Implemented new architecture

Created message service responsible for searching the prompts inside the recognized text and sending it to the client.

Created recognizer with two strategies: whisper and Dany's fast whisper.

Implemented file stack which works in the separated thread, sends the file to the recognizer and after that sends the message to the client (Rat, for example).
This commit is contained in:
Sviatoslav Tsariov Yurievich 2024-03-19 19:01:36 +03:00
parent cb5f504d31
commit e89122cb76
19 changed files with 228 additions and 14 deletions

View File

@ -1,20 +1,23 @@
FROM python:3.10-slim FROM python:3.10-slim
ENV FLASK_APP=src/app.py
ARG PIP_REQ_FILE=requirements.txt
WORKDIR /app WORKDIR /app
COPY requirements.txt /app RUN apt update && apt install git ffmpeg -y && \
RUN apt-get update && apt-get install git -y pip3 install "git+https://github.com/openai/whisper.git"
RUN pip3 install -r requirements.txt
RUN pip3 install "git+https://github.com/openai/whisper.git"
RUN apt-get install -y ffmpeg
RUN whisper --model medium --language ru dummy.wav; exit 0 RUN whisper --model medium --language ru dummy.wav; exit 0 && \
RUN whisper --model small --language ru dummy.wav; exit 0 whisper --model small --language ru dummy.wav; exit 0
COPY . . COPY src/ src/
# Separate requirements installation to keep other dependencies
# in cache
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
RUN pip3 install -r ${PIP_REQ_FILE}
EXPOSE 5000 EXPOSE 5000
ENV FLASK_APP=src/app.py CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]

7
Makefile Normal file
View File

@ -0,0 +1,7 @@
run:
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
CUDA_LAUNCH_BLOCKING=1 \
FLASK_APP=src/app.py \
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
flask run --host=0.0.0.0

View File

@ -1,8 +1,62 @@
flask==3.0.2 asgiref==3.7.2
Jinja2==3.1.3 av==11.0.0
blinker==1.7.0 blinker==1.7.0
Werkzeug==3.0.1 certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7 click==8.1.7
coloredlogs==15.0.1
ctranslate2==4.0.0
Cython==3.0.8
dtw-python==1.3.1
faster-whisper==1.0.0
filelock==3.13.1
Flask==3.0.2
flatbuffers==23.5.26
fsspec==2024.2.0
huggingface-hub==0.21.3
humanfriendly==10.0
idna==3.6
itsdangerous==2.1.2 itsdangerous==2.1.2
Jinja2==3.1.3
llvmlite==0.42.0
MarkupSafe==2.1.5 MarkupSafe==2.1.5
more-itertools==10.2.0
mpmath==1.3.0
networkx==3.2.1
numba==0.59.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.19.3
nvidia-nvjitlink-cu12==12.3.101
nvidia-nvtx-cu12==12.1.105
onnxruntime==1.17.1
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
packaging==23.2
pillow==10.2.0
protobuf==4.25.3
python-dotenv==1.0.1 python-dotenv==1.0.1
PyYAML==6.0.1
regex==2023.12.25
requests==2.31.0
scipy==1.12.0
six==1.16.0
sympy==1.12
tiktoken==0.6.0
tokenizers==0.15.2
torch==2.2.1
torchaudio==2.2.1
torchvision==0.17.1
tqdm==4.66.2
triton==2.2.0
typing_extensions==4.10.0
urllib3==2.2.1
Werkzeug==3.0.1
whisper-timestamped==1.15.0

View File

@ -32,6 +32,7 @@ def recognize_files(handler_fn):
results = [] results = []
for filename, handle in request.files.items(): for filename, handle in request.files.items():
#
temp = NamedTemporaryFile() temp = NamedTemporaryFile()
handle.save(temp) handle.save(temp)
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT) result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)

12
src/config.py Normal file
View File

@ -0,0 +1,12 @@
import os
import torch
from dotenv import load_dotenv
load_dotenv()
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

32
src/file_stack.py Normal file
View File

@ -0,0 +1,32 @@
from threading import Thread
from recognizer import Recognizer
from message import MessageComposer
class WavStack:
def __init__(self, recognizer: Recognizer, message_composer: MessageComposer):
self._stack = []
self._recognizer = recognizer
self._message_composer = message_composer
self._running = False
def append(self, file):
self._stack.append(file)
def loop(self):
self._running = True
while self._running:
if self._stack:
file = self._stack.pop(0)
recognized_text = self._recognizer.recognize(file)
message = self._message_composer.compose(recognized_text)
if message.has_prompt():
message.send()
def start_loop_in_thread(self):
thread = Thread(target=self.loop)
thread.start()
def stop_loop(self):
self._running = False

6
src/message/__init__.py Normal file
View File

@ -0,0 +1,6 @@
from message.prompt_service import PromptService
from message.message_sender.message_sender import MessageSender
from message.message_composer import MessageComposer
from message.message import Message
import message.message_sender as message_sender

17
src/message/message.py Normal file
View File

@ -0,0 +1,17 @@
from message import PromptService, MessageSender
class Message:
def __init__(self, prompt_service: PromptService, message_sender: MessageSender, \
recognized_text: str):
self._prompt_service = prompt_service
self._message_sender = message_sender
self._recognized_text = recognized_text
def has_prompt(self) -> bool:
return self._prompt_service.has_prompt(self._recognized_text)
def send(self):
self._message_sender.send(self._generate_response())
def _generate_response(self) -> str:
return self._prompt_service.filter_words_with_prompt(self._recognized_text)

View File

@ -0,0 +1,9 @@
from message import Message, PromptService, MessageSender
class MessageComposer:
def __init__(self, prompt_service: PromptService, message_sender: MessageSender):
self._prompt_service = prompt_service
self._message_sender = message_sender
def compose(self, recognized_text) -> Message:
return Message(self._prompt_service, self._message_sender, recognized_text)

View File

@ -0,0 +1,3 @@
from message.message_sender.message_sender import MessageSender
from message.message_sender.message_sender_strategy import MessageSenderStrategy
from message.message_sender.rat_strategy import RatStrategy

View File

@ -0,0 +1,8 @@
from message.message_sender import MessageSenderStrategy
class MessageSender:
def __init__(self, strategy: MessageSenderStrategy):
self._strategy = strategy
def send(self, message):
self._strategy.send(message)

View File

@ -0,0 +1,6 @@
from abc import ABC, abstractmethod
class MessageSenderStrategy(ABC):
@abstractmethod
def send(self, message):
pass

View File

@ -0,0 +1,12 @@
import requests
from message.message_sender import MessageSenderStrategy
MESSAGE_ENDPOINT = '/message'
class RatStrategy(MessageSenderStrategy):
def __init__(self, url):
self._url = url
def send(self, message):
requests.post(self._url + MESSAGE_ENDPOINT, json={'message': message})

View File

@ -0,0 +1,16 @@
class PromptService:
def __init__(self, prompt):
self._prompt = prompt
def has_prompt(self, text: str) -> bool:
for part in text.split(' '):
return part in self._prompt.split(' ')
def filter_words_with_prompt(self, text: str) -> str:
words = []
for part in text.split(' '):
if part in self._prompt.split(' '):
words.append(part)
return words

View File

@ -0,0 +1,4 @@
from recognizer.recognizer import Recognizer
from recognizer.recognizer_strategy import RecognizerStrategy
from recognizer.whisper_strategy import WhisperStrategy
from recognizer.fast_whisper_strategy import FastWhisperStrategy

View File

@ -0,0 +1,5 @@
from recognizer import RecognizerStrategy
class FastWhisperStrategy(RecognizerStrategy):
def recognize(self, file) -> str:
return ''

View File

@ -0,0 +1,8 @@
from recognizer import RecognizerStrategy
class Recognizer:
def __init__(self, strategy: RecognizerStrategy):
self._strategy = strategy
def recognize(self, file) -> str:
self._strategy.recognize(file)

View File

@ -0,0 +1,6 @@
from abc import ABC, abstractmethod
class RecognizerStrategy(ABC):
@abstractmethod
def recognize(self, file) -> str:
pass

View File

@ -0,0 +1,5 @@
from recognizer import RecognizerStrategy
class WhisperStrategy(RecognizerStrategy):
def recognize(self, file) -> str:
return ''