Implemented new architecture

Created message service responsible for searching the prompts inside the recognized text and sending it to the client.

Created recognizer with two strategies: whisper and Dany's fast whisper.

Implemented file stack which works in the separated thread, sends the file to the recognizer and after that sends the message to the client (Rat, for example).
This commit is contained in:
Sviatoslav Tsariov Yurievich 2024-03-19 19:01:36 +03:00
parent cb5f504d31
commit e89122cb76
19 changed files with 228 additions and 14 deletions

@ -1,20 +1,23 @@
FROM python:3.10-slim
ENV FLASK_APP=src/app.py
ARG PIP_REQ_FILE=requirements.txt
WORKDIR /app
COPY requirements.txt /app
RUN apt-get update && apt-get install git -y
RUN pip3 install -r requirements.txt
RUN pip3 install "git+https://github.com/openai/whisper.git"
RUN apt-get install -y ffmpeg
RUN apt update && apt install git ffmpeg -y && \
pip3 install "git+https://github.com/openai/whisper.git"
RUN whisper --model medium --language ru dummy.wav; exit 0
RUN whisper --model small --language ru dummy.wav; exit 0
RUN whisper --model medium --language ru dummy.wav; exit 0 && \
whisper --model small --language ru dummy.wav; exit 0
COPY . .
COPY src/ src/
# Separate requirements installation to keep other dependencies
# in cache
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
RUN pip3 install -r ${PIP_REQ_FILE}
EXPOSE 5000
ENV FLASK_APP=src/app.py
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]

7
Makefile Normal file

@ -0,0 +1,7 @@
run:
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
CUDA_LAUNCH_BLOCKING=1 \
FLASK_APP=src/app.py \
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
flask run --host=0.0.0.0

@ -1,8 +1,62 @@
flask==3.0.2
Jinja2==3.1.3
asgiref==3.7.2
av==11.0.0
blinker==1.7.0
Werkzeug==3.0.1
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
coloredlogs==15.0.1
ctranslate2==4.0.0
Cython==3.0.8
dtw-python==1.3.1
faster-whisper==1.0.0
filelock==3.13.1
Flask==3.0.2
flatbuffers==23.5.26
fsspec==2024.2.0
huggingface-hub==0.21.3
humanfriendly==10.0
idna==3.6
itsdangerous==2.1.2
Jinja2==3.1.3
llvmlite==0.42.0
MarkupSafe==2.1.5
more-itertools==10.2.0
mpmath==1.3.0
networkx==3.2.1
numba==0.59.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.19.3
nvidia-nvjitlink-cu12==12.3.101
nvidia-nvtx-cu12==12.1.105
onnxruntime==1.17.1
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
packaging==23.2
pillow==10.2.0
protobuf==4.25.3
python-dotenv==1.0.1
PyYAML==6.0.1
regex==2023.12.25
requests==2.31.0
scipy==1.12.0
six==1.16.0
sympy==1.12
tiktoken==0.6.0
tokenizers==0.15.2
torch==2.2.1
torchaudio==2.2.1
torchvision==0.17.1
tqdm==4.66.2
triton==2.2.0
typing_extensions==4.10.0
urllib3==2.2.1
Werkzeug==3.0.1
whisper-timestamped==1.15.0

@ -32,6 +32,7 @@ def recognize_files(handler_fn):
results = []
for filename, handle in request.files.items():
#
temp = NamedTemporaryFile()
handle.save(temp)
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)

12
src/config.py Normal file

@ -0,0 +1,12 @@
import os
import torch
from dotenv import load_dotenv
load_dotenv()
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

32
src/file_stack.py Normal file

@ -0,0 +1,32 @@
from threading import Thread
from recognizer import Recognizer
from message import MessageComposer
class WavStack:
def __init__(self, recognizer: Recognizer, message_composer: MessageComposer):
self._stack = []
self._recognizer = recognizer
self._message_composer = message_composer
self._running = False
def append(self, file):
self._stack.append(file)
def loop(self):
self._running = True
while self._running:
if self._stack:
file = self._stack.pop(0)
recognized_text = self._recognizer.recognize(file)
message = self._message_composer.compose(recognized_text)
if message.has_prompt():
message.send()
def start_loop_in_thread(self):
thread = Thread(target=self.loop)
thread.start()
def stop_loop(self):
self._running = False

6
src/message/__init__.py Normal file

@ -0,0 +1,6 @@
from message.prompt_service import PromptService
from message.message_sender.message_sender import MessageSender
from message.message_composer import MessageComposer
from message.message import Message
import message.message_sender as message_sender

17
src/message/message.py Normal file

@ -0,0 +1,17 @@
from message import PromptService, MessageSender
class Message:
def __init__(self, prompt_service: PromptService, message_sender: MessageSender, \
recognized_text: str):
self._prompt_service = prompt_service
self._message_sender = message_sender
self._recognized_text = recognized_text
def has_prompt(self) -> bool:
return self._prompt_service.has_prompt(self._recognized_text)
def send(self):
self._message_sender.send(self._generate_response())
def _generate_response(self) -> str:
return self._prompt_service.filter_words_with_prompt(self._recognized_text)

@ -0,0 +1,9 @@
from message import Message, PromptService, MessageSender
class MessageComposer:
def __init__(self, prompt_service: PromptService, message_sender: MessageSender):
self._prompt_service = prompt_service
self._message_sender = message_sender
def compose(self, recognized_text) -> Message:
return Message(self._prompt_service, self._message_sender, recognized_text)

@ -0,0 +1,3 @@
from message.message_sender.message_sender import MessageSender
from message.message_sender.message_sender_strategy import MessageSenderStrategy
from message.message_sender.rat_strategy import RatStrategy

@ -0,0 +1,8 @@
from message.message_sender import MessageSenderStrategy
class MessageSender:
def __init__(self, strategy: MessageSenderStrategy):
self._strategy = strategy
def send(self, message):
self._strategy.send(message)

@ -0,0 +1,6 @@
from abc import ABC, abstractmethod
class MessageSenderStrategy(ABC):
@abstractmethod
def send(self, message):
pass

@ -0,0 +1,12 @@
import requests
from message.message_sender import MessageSenderStrategy
MESSAGE_ENDPOINT = '/message'
class RatStrategy(MessageSenderStrategy):
def __init__(self, url):
self._url = url
def send(self, message):
requests.post(self._url + MESSAGE_ENDPOINT, json={'message': message})

@ -0,0 +1,16 @@
class PromptService:
def __init__(self, prompt):
self._prompt = prompt
def has_prompt(self, text: str) -> bool:
for part in text.split(' '):
return part in self._prompt.split(' ')
def filter_words_with_prompt(self, text: str) -> str:
words = []
for part in text.split(' '):
if part in self._prompt.split(' '):
words.append(part)
return words

@ -0,0 +1,4 @@
from recognizer.recognizer import Recognizer
from recognizer.recognizer_strategy import RecognizerStrategy
from recognizer.whisper_strategy import WhisperStrategy
from recognizer.fast_whisper_strategy import FastWhisperStrategy

@ -0,0 +1,5 @@
from recognizer import RecognizerStrategy
class FastWhisperStrategy(RecognizerStrategy):
def recognize(self, file) -> str:
return ''

@ -0,0 +1,8 @@
from recognizer import RecognizerStrategy
class Recognizer:
def __init__(self, strategy: RecognizerStrategy):
self._strategy = strategy
def recognize(self, file) -> str:
self._strategy.recognize(file)

@ -0,0 +1,6 @@
from abc import ABC, abstractmethod
class RecognizerStrategy(ABC):
@abstractmethod
def recognize(self, file) -> str:
pass

@ -0,0 +1,5 @@
from recognizer import RecognizerStrategy
class WhisperStrategy(RecognizerStrategy):
def recognize(self, file) -> str:
return ''