Compare commits
No commits in common. "70e6e6ca902e09582bdc012b3b5549133a3ada07" and "cb5f504d31ca93c564babd78183ac1c5e4e099ec" have entirely different histories.
70e6e6ca90
...
cb5f504d31
23
Dockerfile
23
Dockerfile
@ -1,23 +1,20 @@
|
|||||||
FROM python:3.10-slim
|
FROM python:3.10-slim
|
||||||
|
|
||||||
ENV FLASK_APP=src/app.py
|
|
||||||
ARG PIP_REQ_FILE=requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
RUN apt update && apt install git ffmpeg -y && \
|
COPY requirements.txt /app
|
||||||
pip3 install "git+https://github.com/openai/whisper.git"
|
RUN apt-get update && apt-get install git -y
|
||||||
|
RUN pip3 install -r requirements.txt
|
||||||
|
RUN pip3 install "git+https://github.com/openai/whisper.git"
|
||||||
|
RUN apt-get install -y ffmpeg
|
||||||
|
|
||||||
RUN whisper --model medium --language ru dummy.wav; exit 0 && \
|
RUN whisper --model medium --language ru dummy.wav; exit 0
|
||||||
whisper --model small --language ru dummy.wav; exit 0
|
RUN whisper --model small --language ru dummy.wav; exit 0
|
||||||
|
|
||||||
COPY src/ src/
|
COPY . .
|
||||||
|
|
||||||
# Separate requirements installation to keep other dependencies
|
|
||||||
# in cache
|
|
||||||
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
|
|
||||||
RUN pip3 install -r ${PIP_REQ_FILE}
|
|
||||||
|
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
|
ENV FLASK_APP=src/app.py
|
||||||
|
|
||||||
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
7
Makefile
7
Makefile
@ -1,7 +0,0 @@
|
|||||||
run:
|
|
||||||
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
|
|
||||||
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
|
|
||||||
CUDA_LAUNCH_BLOCKING=1 \
|
|
||||||
FLASK_APP=src/app.py \
|
|
||||||
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
|
|
||||||
flask run --host=0.0.0.0
|
|
@ -1,62 +1,8 @@
|
|||||||
asgiref==3.7.2
|
flask==3.0.2
|
||||||
av==11.0.0
|
|
||||||
blinker==1.7.0
|
|
||||||
certifi==2024.2.2
|
|
||||||
charset-normalizer==3.3.2
|
|
||||||
click==8.1.7
|
|
||||||
coloredlogs==15.0.1
|
|
||||||
ctranslate2==4.0.0
|
|
||||||
Cython==3.0.8
|
|
||||||
dtw-python==1.3.1
|
|
||||||
faster-whisper==1.0.0
|
|
||||||
filelock==3.13.1
|
|
||||||
Flask==3.0.2
|
|
||||||
flatbuffers==23.5.26
|
|
||||||
fsspec==2024.2.0
|
|
||||||
huggingface-hub==0.21.3
|
|
||||||
humanfriendly==10.0
|
|
||||||
idna==3.6
|
|
||||||
itsdangerous==2.1.2
|
|
||||||
Jinja2==3.1.3
|
Jinja2==3.1.3
|
||||||
llvmlite==0.42.0
|
blinker==1.7.0
|
||||||
MarkupSafe==2.1.5
|
|
||||||
more-itertools==10.2.0
|
|
||||||
mpmath==1.3.0
|
|
||||||
networkx==3.2.1
|
|
||||||
numba==0.59.0
|
|
||||||
numpy==1.26.4
|
|
||||||
nvidia-cublas-cu12==12.1.3.1
|
|
||||||
nvidia-cuda-cupti-cu12==12.1.105
|
|
||||||
nvidia-cuda-nvrtc-cu12==12.1.105
|
|
||||||
nvidia-cuda-runtime-cu12==12.1.105
|
|
||||||
nvidia-cudnn-cu12==8.9.2.26
|
|
||||||
nvidia-cufft-cu12==11.0.2.54
|
|
||||||
nvidia-curand-cu12==10.3.2.106
|
|
||||||
nvidia-cusolver-cu12==11.4.5.107
|
|
||||||
nvidia-cusparse-cu12==12.1.0.106
|
|
||||||
nvidia-nccl-cu12==2.19.3
|
|
||||||
nvidia-nvjitlink-cu12==12.3.101
|
|
||||||
nvidia-nvtx-cu12==12.1.105
|
|
||||||
onnxruntime==1.17.1
|
|
||||||
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
|
|
||||||
packaging==23.2
|
|
||||||
pillow==10.2.0
|
|
||||||
protobuf==4.25.3
|
|
||||||
python-dotenv==1.0.1
|
|
||||||
PyYAML==6.0.1
|
|
||||||
regex==2023.12.25
|
|
||||||
requests==2.31.0
|
|
||||||
scipy==1.12.0
|
|
||||||
six==1.16.0
|
|
||||||
sympy==1.12
|
|
||||||
tiktoken==0.6.0
|
|
||||||
tokenizers==0.15.2
|
|
||||||
torch==2.2.1
|
|
||||||
torchaudio==2.2.1
|
|
||||||
torchvision==0.17.1
|
|
||||||
tqdm==4.66.2
|
|
||||||
triton==2.2.0
|
|
||||||
typing_extensions==4.10.0
|
|
||||||
urllib3==2.2.1
|
|
||||||
Werkzeug==3.0.1
|
Werkzeug==3.0.1
|
||||||
whisper-timestamped==1.15.0
|
click==8.1.7
|
||||||
|
itsdangerous==2.1.2
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
84
src/app.py
84
src/app.py
@ -1,59 +1,67 @@
|
|||||||
from flask import Flask, abort, request
|
from flask import Flask, abort, request
|
||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
import whisper
|
||||||
|
import torch
|
||||||
import sys
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
import config
|
load_dotenv()
|
||||||
|
|
||||||
from queue_stack import QueueStack
|
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
|
||||||
from queue_stack.strategies import RecognizeAndSendStrategy
|
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
|
||||||
|
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
|
||||||
|
|
||||||
from recognizer import Recognizer
|
# Check if NVIDIA GPU is available
|
||||||
from recognizer.strategies import WhisperStrategy, FastWhisperStrategy
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
from message import MessageService
|
# Load the Whisper model:
|
||||||
from message.strategies import SosMessageStrategy, NumberMessageStrategy
|
model = whisper.load_model(HARPYIA_MODEL, device=DEVICE)
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
whisper_recognizer = Recognizer(WhisperStrategy())
|
|
||||||
fast_whisper_recognizer = Recognizer(FastWhisperStrategy())
|
|
||||||
|
|
||||||
sos_message_service = MessageService(SosMessageStrategy())
|
|
||||||
number_message_service = MessageService(NumberMessageStrategy())
|
|
||||||
|
|
||||||
queue_stack = QueueStack(RecognizeAndSendStrategy())
|
|
||||||
queue_stack.start_loop_in_thread()
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def hello():
|
def hello():
|
||||||
return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize-number' route."
|
return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route."
|
||||||
|
|
||||||
def recognize_files(message_service: MessageService):
|
def recognize_files(handler_fn):
|
||||||
if not request.files:
|
if not request.files:
|
||||||
abort(400)
|
abort(400)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
for filename, handle in request.files.items():
|
for filename, handle in request.files.items():
|
||||||
temp = NamedTemporaryFile()
|
temp = NamedTemporaryFile()
|
||||||
handle.save(temp)
|
handle.save(temp)
|
||||||
|
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
|
||||||
|
results.append({
|
||||||
|
'filename': filename,
|
||||||
|
'transcript': handler_fn(result['text']),
|
||||||
|
})
|
||||||
|
|
||||||
results.append(queue_stack.append_and_await((
|
print(results, file=sys.stderr)
|
||||||
temp,
|
return {'results': results}
|
||||||
whisper_recognizer,
|
|
||||||
message_service,
|
|
||||||
config.HARPYIA_LANGUAGE,
|
|
||||||
message_service.get_prompt()
|
|
||||||
)))
|
|
||||||
|
|
||||||
print(results, file=sys.stderr)
|
|
||||||
return {'results': results}
|
|
||||||
|
|
||||||
@app.route('/recognize', methods=['POST'])
|
@app.route('/recognize', methods=['POST'])
|
||||||
def recognize():
|
def recognize():
|
||||||
return recognize_files(sos_message_service)
|
return recognize_files(lambda text: text)
|
||||||
|
|
||||||
@app.route('/recognize-number', methods=['POST'])
|
@app.route('/recognize_number', methods=['POST'])
|
||||||
def recognize_number():
|
def recognize_number():
|
||||||
return recognize_files(number_message_service)
|
return recognize_files(transfer_and_clean)
|
||||||
|
|
||||||
|
def transfer_and_clean(input_string):
|
||||||
|
number_mapping = {
|
||||||
|
"один": "1",
|
||||||
|
"два": "2",
|
||||||
|
"три": "3"
|
||||||
|
}
|
||||||
|
|
||||||
|
for word, number in number_mapping.items():
|
||||||
|
input_string = input_string.replace(word, number)
|
||||||
|
|
||||||
|
input_string = re.sub(r'[^\d]+', '', input_string)
|
||||||
|
|
||||||
|
return input_string
|
||||||
|
|
||||||
|
@ -1,21 +0,0 @@
|
|||||||
import os
|
|
||||||
import torch
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'small'
|
|
||||||
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
|
|
||||||
HARPYIA_SAMPLE_RATE = os.getenv('HARPYIA_SAMPLE_RATE') or 160000
|
|
||||||
|
|
||||||
WHISPER_NUM_WORKERS = os.getenv('WHISPER_NUM_WORKERS') or 6
|
|
||||||
WHISPER_CPU_THREADS = os.getenv('WHISPER_CPU_THREADS') or 10
|
|
||||||
WHISPER_BEAM_SIZE = os.getenv('WHISPER_BEAM_SIZE') or 5
|
|
||||||
|
|
||||||
SOS_PROMPT = os.getenv('SOS_PROMPT') or 'спасите помогите помощь пожар караул кирилл'
|
|
||||||
NUMBER_PROMPT = os.getenv('NUMBER_PROMPT') or 'один два три четыре пять шесть семь восемь девять десять одинадцать двенадцать тринадцать сто сот'
|
|
||||||
|
|
||||||
RAT_URL = os.getenv('RAT_URL') or 'localhost:8081'
|
|
||||||
|
|
||||||
# Check if NVIDIA GPU is available
|
|
||||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@ -1 +0,0 @@
|
|||||||
from message.message_service import MessageService
|
|
@ -1,24 +0,0 @@
|
|||||||
import sys
|
|
||||||
from message.strategies import BaseMessageStrategy
|
|
||||||
|
|
||||||
class MessageService:
|
|
||||||
def __init__(self, strategy: BaseMessageStrategy) -> None:
|
|
||||||
self._strategy = strategy
|
|
||||||
|
|
||||||
def get_prompt(self) -> str:
|
|
||||||
self._strategy.get_prompt()
|
|
||||||
|
|
||||||
def transfer(self, text: str) -> any:
|
|
||||||
return self._strategy.transfer(text)
|
|
||||||
|
|
||||||
def send(self, message: str) -> any:
|
|
||||||
self._strategy.send(message)
|
|
||||||
|
|
||||||
def transfer_and_send(self, recognized_result: any) -> any:
|
|
||||||
message = self.transfer(recognized_result)
|
|
||||||
|
|
||||||
if message:
|
|
||||||
self.send(message)
|
|
||||||
|
|
||||||
print('Sending message:', recognized_result, file=sys.stderr)
|
|
||||||
return message
|
|
@ -1,3 +0,0 @@
|
|||||||
from message.strategies.base_message_strategy import BaseMessageStrategy
|
|
||||||
from message.strategies.sos_message_strategy import SosMessageStrategy
|
|
||||||
from message.strategies.number_message_strategy import NumberMessageStrategy
|
|
@ -1,14 +0,0 @@
|
|||||||
from abc import ABC, abstractmethod
|
|
||||||
|
|
||||||
class BaseMessageStrategy(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def get_prompt() -> str:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def transfer(self, text: str) -> any:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def send(self, message: str) -> any:
|
|
||||||
pass
|
|
@ -1,31 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
import config
|
|
||||||
from message.strategies import BaseMessageStrategy
|
|
||||||
|
|
||||||
class NumberMessageStrategy(BaseMessageStrategy):
|
|
||||||
def __init__(self, prompt=config.NUMBER_PROMPT) -> None:
|
|
||||||
self._prompt = prompt
|
|
||||||
|
|
||||||
def get_prompt(self):
|
|
||||||
return self._prompt
|
|
||||||
|
|
||||||
def transfer(self, recognized_result: any) -> str:
|
|
||||||
return self._transfer_and_clean(recognized_result['text'])
|
|
||||||
|
|
||||||
def _transfer_and_clean(self, text: str) -> str:
|
|
||||||
number_mapping = {
|
|
||||||
"один": "1",
|
|
||||||
"два": "2",
|
|
||||||
"три": "3"
|
|
||||||
}
|
|
||||||
|
|
||||||
for word, number in number_mapping.items():
|
|
||||||
transfered_text = text.replace(word, number)
|
|
||||||
|
|
||||||
transfered_text = re.sub(r'[^\d]+', '', transfered_text)
|
|
||||||
|
|
||||||
return {'recognized': transfered_text}
|
|
||||||
|
|
||||||
def send(self, message: str) -> None:
|
|
||||||
pass
|
|
@ -1,35 +0,0 @@
|
|||||||
from typing import List
|
|
||||||
import requests
|
|
||||||
|
|
||||||
import config
|
|
||||||
from message.strategies import BaseMessageStrategy
|
|
||||||
|
|
||||||
MESSAGE_ENDPOINT = '/message'
|
|
||||||
|
|
||||||
class SosMessageStrategy(BaseMessageStrategy):
|
|
||||||
def __init__(self, prompt=config.SOS_PROMPT, url=config.RAT_URL) -> None:
|
|
||||||
self._prompt = prompt
|
|
||||||
self._url = url
|
|
||||||
|
|
||||||
def get_prompt(self):
|
|
||||||
return self._prompt
|
|
||||||
|
|
||||||
def transfer(self, recognized_result: any) -> str:
|
|
||||||
return {
|
|
||||||
'transcript': recognized_result['text'],
|
|
||||||
'results': self._filter_words_with_prompt(recognized_result['text']),
|
|
||||||
'segments': recognized_result['segments']
|
|
||||||
}
|
|
||||||
|
|
||||||
def _filter_words_with_prompt(self, text: str) -> str:
|
|
||||||
words = []
|
|
||||||
|
|
||||||
for prompt in self._prompt.split(' '):
|
|
||||||
if prompt in text.lower():
|
|
||||||
words.append(prompt)
|
|
||||||
|
|
||||||
return words
|
|
||||||
|
|
||||||
def send(self, message) -> any:
|
|
||||||
pass
|
|
||||||
#return requests.post(self._url + MESSAGE_ENDPOINT, json={'message': message})
|
|
@ -1 +0,0 @@
|
|||||||
from queue_stack.queue_stack import QueueStack
|
|
@ -1,51 +0,0 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
from threading import Thread, Event, Lock
|
|
||||||
|
|
||||||
from queue_stack.strategies import BaseProcessStrategy
|
|
||||||
|
|
||||||
class QueueStack:
|
|
||||||
def __init__(self, strategy: BaseProcessStrategy) -> None:
|
|
||||||
self._stack = []
|
|
||||||
self._strategy = strategy
|
|
||||||
|
|
||||||
self._lock = Lock()
|
|
||||||
self._running = False
|
|
||||||
|
|
||||||
self._last_response = None
|
|
||||||
|
|
||||||
def append(self, args, event=None) -> None:
|
|
||||||
with self._lock:
|
|
||||||
self._stack.append((args, event))
|
|
||||||
|
|
||||||
def append_and_await(self, args) -> any:
|
|
||||||
event = Event()
|
|
||||||
self.append(args, event=event)
|
|
||||||
|
|
||||||
event.wait()
|
|
||||||
event.clear()
|
|
||||||
|
|
||||||
return self._last_response
|
|
||||||
|
|
||||||
def loop(self) -> None:
|
|
||||||
self._running = True
|
|
||||||
|
|
||||||
while self._running:
|
|
||||||
with self._lock:
|
|
||||||
if self._stack:
|
|
||||||
print('Stack length:', len(self._stack), file=sys.stderr)
|
|
||||||
(args, event) = self._stack.pop(0)
|
|
||||||
self._last_response = self._process(*args)
|
|
||||||
|
|
||||||
if event:
|
|
||||||
event.set()
|
|
||||||
|
|
||||||
def _process(self, *args, **kwargs) -> any:
|
|
||||||
return self._strategy.process(*args, **kwargs)
|
|
||||||
|
|
||||||
def start_loop_in_thread(self) -> None:
|
|
||||||
thread = Thread(target=self.loop)
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
def stop_loop(self) -> None:
|
|
||||||
self._running = False
|
|
@ -1,2 +0,0 @@
|
|||||||
from queue_stack.strategies.base_process_strategy import BaseProcessStrategy
|
|
||||||
from queue_stack.strategies.recognize_and_send_strategy import RecognizeAndSendStrategy
|
|
@ -1,6 +0,0 @@
|
|||||||
from abc import ABC, abstractmethod
|
|
||||||
|
|
||||||
class BaseProcessStrategy(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def process(self, *args, **kwargs) -> any:
|
|
||||||
pass
|
|
@ -1,14 +0,0 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
from queue_stack.strategies import BaseProcessStrategy
|
|
||||||
from message import MessageService
|
|
||||||
from recognizer import Recognizer
|
|
||||||
|
|
||||||
class RecognizeAndSendStrategy(BaseProcessStrategy):
|
|
||||||
def process(self, file, recognizer: Recognizer, message_service: MessageService, language, prompt) -> any:
|
|
||||||
|
|
||||||
result = recognizer.recognize(file, language=language, prompt=prompt)
|
|
||||||
message = message_service.transfer_and_send(result)
|
|
||||||
print(message, file=sys.stderr)
|
|
||||||
|
|
||||||
return message
|
|
@ -1 +0,0 @@
|
|||||||
from recognizer.recognizer import Recognizer
|
|
@ -1,14 +0,0 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
import config
|
|
||||||
from recognizer.strategies import BaseRecognizerStrategy
|
|
||||||
|
|
||||||
class Recognizer:
|
|
||||||
def __init__(self, strategy: BaseRecognizerStrategy) -> None:
|
|
||||||
self._strategy = strategy
|
|
||||||
|
|
||||||
def recognize(self, file, language, prompt) -> str:
|
|
||||||
result = self._strategy.recognize(file, language=language, prompt=prompt)
|
|
||||||
|
|
||||||
print(f'Result: {result}', file=sys.stderr)
|
|
||||||
return result
|
|
@ -1,3 +0,0 @@
|
|||||||
from recognizer.strategies.base_recognizer_strategy import BaseRecognizerStrategy
|
|
||||||
from recognizer.strategies.whisper_strategy import WhisperStrategy
|
|
||||||
from recognizer.strategies.fast_whisper_strategy import FastWhisperStrategy
|
|
@ -1,6 +0,0 @@
|
|||||||
from abc import ABC, abstractmethod
|
|
||||||
|
|
||||||
class BaseRecognizerStrategy(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def recognize(self, file, language, prompt) -> any:
|
|
||||||
pass
|
|
@ -1,59 +0,0 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
import whisper
|
|
||||||
from faster_whisper import WhisperModel
|
|
||||||
|
|
||||||
import config
|
|
||||||
from recognizer.strategies import BaseRecognizerStrategy
|
|
||||||
|
|
||||||
class FastWhisperStrategy(BaseRecognizerStrategy):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._model = WhisperModel(
|
|
||||||
model_size_or_path=config.HARPYIA_MODEL,
|
|
||||||
device=config.DEVICE,
|
|
||||||
num_workers=config.WHISPER_NUM_WORKERS,
|
|
||||||
cpu_threads=config.WHISPER_CPU_THREADS
|
|
||||||
)
|
|
||||||
|
|
||||||
def recognize(self, file, language, prompt) -> any:
|
|
||||||
audio = self._prepare_file(file.name)
|
|
||||||
return self._transcribe(audio, language, prompt)
|
|
||||||
|
|
||||||
def _prepare_file(self, filename: str):
|
|
||||||
audio = whisper.load_audio(filename, sr=config.HARPYIA_SAMPLE_RATE)
|
|
||||||
audio = whisper.pad_or_trim(audio)
|
|
||||||
return audio
|
|
||||||
|
|
||||||
def _transcribe(self, audio, language, prompt):
|
|
||||||
segments, _ = self._model.transcribe(
|
|
||||||
audio,
|
|
||||||
language=language,
|
|
||||||
initial_prompt=prompt,
|
|
||||||
condition_on_previous_text=False,
|
|
||||||
vad_filter=True,
|
|
||||||
beam_size=config.WHISPER_BEAM_SIZE,
|
|
||||||
)
|
|
||||||
|
|
||||||
print('Segments:', file=sys.stderr)
|
|
||||||
for i in segments:
|
|
||||||
print(i, file=sys.stderr)
|
|
||||||
|
|
||||||
words = []
|
|
||||||
for segment in list(segments):
|
|
||||||
words.append(segment.text)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'text': ' '.join(words),
|
|
||||||
'segments': {
|
|
||||||
'id': None,
|
|
||||||
'seek': None,
|
|
||||||
'start': None,
|
|
||||||
'end': None,
|
|
||||||
'text': None,
|
|
||||||
'tokens': None,
|
|
||||||
'temperature': None,
|
|
||||||
'avg_logprob': None,
|
|
||||||
'compression_ratio': None,
|
|
||||||
'no_speech_prob': None,
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,12 +0,0 @@
|
|||||||
import whisper
|
|
||||||
|
|
||||||
import config
|
|
||||||
from recognizer.strategies import BaseRecognizerStrategy
|
|
||||||
|
|
||||||
class WhisperStrategy(BaseRecognizerStrategy):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._model = whisper.load_model(config.HARPYIA_MODEL, device=config.DEVICE)
|
|
||||||
|
|
||||||
def recognize(self, file, language, prompt) -> any:
|
|
||||||
return self._model.transcribe(file.name, \
|
|
||||||
language=language, initial_prompt=prompt)
|
|
Loading…
x
Reference in New Issue
Block a user