Implemented new architecture
Created message service responsible for searching the prompts inside the recognized text and sending it to the client. Created recognizer with two strategies: whisper and Dany's fast whisper. Implemented file stack which works in the separated thread, sends the file to the recognizer and after that sends the message to the client (Rat, for example).
This commit is contained in:
parent
cb5f504d31
commit
e89122cb76
25
Dockerfile
25
Dockerfile
@ -1,20 +1,23 @@
|
|||||||
FROM python:3.10-slim
|
FROM python:3.10-slim
|
||||||
|
|
||||||
|
ENV FLASK_APP=src/app.py
|
||||||
|
ARG PIP_REQ_FILE=requirements.txt
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY requirements.txt /app
|
RUN apt update && apt install git ffmpeg -y && \
|
||||||
RUN apt-get update && apt-get install git -y
|
pip3 install "git+https://github.com/openai/whisper.git"
|
||||||
RUN pip3 install -r requirements.txt
|
|
||||||
RUN pip3 install "git+https://github.com/openai/whisper.git"
|
|
||||||
RUN apt-get install -y ffmpeg
|
|
||||||
|
|
||||||
RUN whisper --model medium --language ru dummy.wav; exit 0
|
RUN whisper --model medium --language ru dummy.wav; exit 0 && \
|
||||||
RUN whisper --model small --language ru dummy.wav; exit 0
|
whisper --model small --language ru dummy.wav; exit 0
|
||||||
|
|
||||||
COPY . .
|
COPY src/ src/
|
||||||
|
|
||||||
|
# Separate requirements installation to keep other dependencies
|
||||||
|
# in cache
|
||||||
|
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
|
||||||
|
RUN pip3 install -r ${PIP_REQ_FILE}
|
||||||
|
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
ENV FLASK_APP=src/app.py
|
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
||||||
|
|
||||||
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
|
7
Makefile
Normal file
7
Makefile
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
run:
|
||||||
|
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
|
||||||
|
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
|
||||||
|
CUDA_LAUNCH_BLOCKING=1 \
|
||||||
|
FLASK_APP=src/app.py \
|
||||||
|
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
|
||||||
|
flask run --host=0.0.0.0
|
@ -1,8 +1,62 @@
|
|||||||
flask==3.0.2
|
asgiref==3.7.2
|
||||||
Jinja2==3.1.3
|
av==11.0.0
|
||||||
blinker==1.7.0
|
blinker==1.7.0
|
||||||
Werkzeug==3.0.1
|
certifi==2024.2.2
|
||||||
|
charset-normalizer==3.3.2
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
|
coloredlogs==15.0.1
|
||||||
|
ctranslate2==4.0.0
|
||||||
|
Cython==3.0.8
|
||||||
|
dtw-python==1.3.1
|
||||||
|
faster-whisper==1.0.0
|
||||||
|
filelock==3.13.1
|
||||||
|
Flask==3.0.2
|
||||||
|
flatbuffers==23.5.26
|
||||||
|
fsspec==2024.2.0
|
||||||
|
huggingface-hub==0.21.3
|
||||||
|
humanfriendly==10.0
|
||||||
|
idna==3.6
|
||||||
itsdangerous==2.1.2
|
itsdangerous==2.1.2
|
||||||
|
Jinja2==3.1.3
|
||||||
|
llvmlite==0.42.0
|
||||||
MarkupSafe==2.1.5
|
MarkupSafe==2.1.5
|
||||||
|
more-itertools==10.2.0
|
||||||
|
mpmath==1.3.0
|
||||||
|
networkx==3.2.1
|
||||||
|
numba==0.59.0
|
||||||
|
numpy==1.26.4
|
||||||
|
nvidia-cublas-cu12==12.1.3.1
|
||||||
|
nvidia-cuda-cupti-cu12==12.1.105
|
||||||
|
nvidia-cuda-nvrtc-cu12==12.1.105
|
||||||
|
nvidia-cuda-runtime-cu12==12.1.105
|
||||||
|
nvidia-cudnn-cu12==8.9.2.26
|
||||||
|
nvidia-cufft-cu12==11.0.2.54
|
||||||
|
nvidia-curand-cu12==10.3.2.106
|
||||||
|
nvidia-cusolver-cu12==11.4.5.107
|
||||||
|
nvidia-cusparse-cu12==12.1.0.106
|
||||||
|
nvidia-nccl-cu12==2.19.3
|
||||||
|
nvidia-nvjitlink-cu12==12.3.101
|
||||||
|
nvidia-nvtx-cu12==12.1.105
|
||||||
|
onnxruntime==1.17.1
|
||||||
|
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
|
||||||
|
packaging==23.2
|
||||||
|
pillow==10.2.0
|
||||||
|
protobuf==4.25.3
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
|
PyYAML==6.0.1
|
||||||
|
regex==2023.12.25
|
||||||
|
requests==2.31.0
|
||||||
|
scipy==1.12.0
|
||||||
|
six==1.16.0
|
||||||
|
sympy==1.12
|
||||||
|
tiktoken==0.6.0
|
||||||
|
tokenizers==0.15.2
|
||||||
|
torch==2.2.1
|
||||||
|
torchaudio==2.2.1
|
||||||
|
torchvision==0.17.1
|
||||||
|
tqdm==4.66.2
|
||||||
|
triton==2.2.0
|
||||||
|
typing_extensions==4.10.0
|
||||||
|
urllib3==2.2.1
|
||||||
|
Werkzeug==3.0.1
|
||||||
|
whisper-timestamped==1.15.0
|
@ -32,6 +32,7 @@ def recognize_files(handler_fn):
|
|||||||
results = []
|
results = []
|
||||||
|
|
||||||
for filename, handle in request.files.items():
|
for filename, handle in request.files.items():
|
||||||
|
#
|
||||||
temp = NamedTemporaryFile()
|
temp = NamedTemporaryFile()
|
||||||
handle.save(temp)
|
handle.save(temp)
|
||||||
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
|
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
|
||||||
|
12
src/config.py
Normal file
12
src/config.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import os
|
||||||
|
import torch
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
|
||||||
|
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
|
||||||
|
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
|
||||||
|
|
||||||
|
# Check if NVIDIA GPU is available
|
||||||
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
32
src/file_stack.py
Normal file
32
src/file_stack.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
from recognizer import Recognizer
|
||||||
|
from message import MessageComposer
|
||||||
|
|
||||||
|
class WavStack:
|
||||||
|
def __init__(self, recognizer: Recognizer, message_composer: MessageComposer):
|
||||||
|
self._stack = []
|
||||||
|
self._recognizer = recognizer
|
||||||
|
self._message_composer = message_composer
|
||||||
|
self._running = False
|
||||||
|
|
||||||
|
def append(self, file):
|
||||||
|
self._stack.append(file)
|
||||||
|
|
||||||
|
def loop(self):
|
||||||
|
self._running = True
|
||||||
|
while self._running:
|
||||||
|
if self._stack:
|
||||||
|
file = self._stack.pop(0)
|
||||||
|
recognized_text = self._recognizer.recognize(file)
|
||||||
|
message = self._message_composer.compose(recognized_text)
|
||||||
|
|
||||||
|
if message.has_prompt():
|
||||||
|
message.send()
|
||||||
|
|
||||||
|
def start_loop_in_thread(self):
|
||||||
|
thread = Thread(target=self.loop)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
def stop_loop(self):
|
||||||
|
self._running = False
|
6
src/message/__init__.py
Normal file
6
src/message/__init__.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from message.prompt_service import PromptService
|
||||||
|
from message.message_sender.message_sender import MessageSender
|
||||||
|
from message.message_composer import MessageComposer
|
||||||
|
from message.message import Message
|
||||||
|
|
||||||
|
import message.message_sender as message_sender
|
17
src/message/message.py
Normal file
17
src/message/message.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from message import PromptService, MessageSender
|
||||||
|
|
||||||
|
class Message:
|
||||||
|
def __init__(self, prompt_service: PromptService, message_sender: MessageSender, \
|
||||||
|
recognized_text: str):
|
||||||
|
self._prompt_service = prompt_service
|
||||||
|
self._message_sender = message_sender
|
||||||
|
self._recognized_text = recognized_text
|
||||||
|
|
||||||
|
def has_prompt(self) -> bool:
|
||||||
|
return self._prompt_service.has_prompt(self._recognized_text)
|
||||||
|
|
||||||
|
def send(self):
|
||||||
|
self._message_sender.send(self._generate_response())
|
||||||
|
|
||||||
|
def _generate_response(self) -> str:
|
||||||
|
return self._prompt_service.filter_words_with_prompt(self._recognized_text)
|
9
src/message/message_composer.py
Normal file
9
src/message/message_composer.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from message import Message, PromptService, MessageSender
|
||||||
|
|
||||||
|
class MessageComposer:
|
||||||
|
def __init__(self, prompt_service: PromptService, message_sender: MessageSender):
|
||||||
|
self._prompt_service = prompt_service
|
||||||
|
self._message_sender = message_sender
|
||||||
|
|
||||||
|
def compose(self, recognized_text) -> Message:
|
||||||
|
return Message(self._prompt_service, self._message_sender, recognized_text)
|
3
src/message/message_sender/__init__.py
Normal file
3
src/message/message_sender/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from message.message_sender.message_sender import MessageSender
|
||||||
|
from message.message_sender.message_sender_strategy import MessageSenderStrategy
|
||||||
|
from message.message_sender.rat_strategy import RatStrategy
|
8
src/message/message_sender/message_sender.py
Normal file
8
src/message/message_sender/message_sender.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
from message.message_sender import MessageSenderStrategy
|
||||||
|
|
||||||
|
class MessageSender:
|
||||||
|
def __init__(self, strategy: MessageSenderStrategy):
|
||||||
|
self._strategy = strategy
|
||||||
|
|
||||||
|
def send(self, message):
|
||||||
|
self._strategy.send(message)
|
6
src/message/message_sender/message_sender_strategy.py
Normal file
6
src/message/message_sender/message_sender_strategy.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
class MessageSenderStrategy(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def send(self, message):
|
||||||
|
pass
|
12
src/message/message_sender/rat_strategy.py
Normal file
12
src/message/message_sender/rat_strategy.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
from message.message_sender import MessageSenderStrategy
|
||||||
|
|
||||||
|
MESSAGE_ENDPOINT = '/message'
|
||||||
|
|
||||||
|
class RatStrategy(MessageSenderStrategy):
|
||||||
|
def __init__(self, url):
|
||||||
|
self._url = url
|
||||||
|
|
||||||
|
def send(self, message):
|
||||||
|
requests.post(self._url + MESSAGE_ENDPOINT, json={'message': message})
|
16
src/message/prompt_service.py
Normal file
16
src/message/prompt_service.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
class PromptService:
|
||||||
|
def __init__(self, prompt):
|
||||||
|
self._prompt = prompt
|
||||||
|
|
||||||
|
def has_prompt(self, text: str) -> bool:
|
||||||
|
for part in text.split(' '):
|
||||||
|
return part in self._prompt.split(' ')
|
||||||
|
|
||||||
|
def filter_words_with_prompt(self, text: str) -> str:
|
||||||
|
words = []
|
||||||
|
|
||||||
|
for part in text.split(' '):
|
||||||
|
if part in self._prompt.split(' '):
|
||||||
|
words.append(part)
|
||||||
|
|
||||||
|
return words
|
4
src/recognizer/__init__.py
Normal file
4
src/recognizer/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from recognizer.recognizer import Recognizer
|
||||||
|
from recognizer.recognizer_strategy import RecognizerStrategy
|
||||||
|
from recognizer.whisper_strategy import WhisperStrategy
|
||||||
|
from recognizer.fast_whisper_strategy import FastWhisperStrategy
|
5
src/recognizer/fast_whisper_strategy.py
Normal file
5
src/recognizer/fast_whisper_strategy.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from recognizer import RecognizerStrategy
|
||||||
|
|
||||||
|
class FastWhisperStrategy(RecognizerStrategy):
|
||||||
|
def recognize(self, file) -> str:
|
||||||
|
return ''
|
8
src/recognizer/recognizer.py
Normal file
8
src/recognizer/recognizer.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
from recognizer import RecognizerStrategy
|
||||||
|
|
||||||
|
class Recognizer:
|
||||||
|
def __init__(self, strategy: RecognizerStrategy):
|
||||||
|
self._strategy = strategy
|
||||||
|
|
||||||
|
def recognize(self, file) -> str:
|
||||||
|
self._strategy.recognize(file)
|
6
src/recognizer/recognizer_strategy.py
Normal file
6
src/recognizer/recognizer_strategy.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
class RecognizerStrategy(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def recognize(self, file) -> str:
|
||||||
|
pass
|
5
src/recognizer/whisper_strategy.py
Normal file
5
src/recognizer/whisper_strategy.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from recognizer import RecognizerStrategy
|
||||||
|
|
||||||
|
class WhisperStrategy(RecognizerStrategy):
|
||||||
|
def recognize(self, file) -> str:
|
||||||
|
return ''
|
Loading…
x
Reference in New Issue
Block a user