From e89122cb76c440afe95b6814c57badb16eb3a772 Mon Sep 17 00:00:00 2001 From: Sviatoslav Tsariov Date: Tue, 19 Mar 2024 19:01:36 +0300 Subject: [PATCH] Implemented new architecture Created message service responsible for searching the prompts inside the recognized text and sending it to the client. Created recognizer with two strategies: whisper and Dany's fast whisper. Implemented file stack which works in the separated thread, sends the file to the recognizer and after that sends the message to the client (Rat, for example). --- Dockerfile | 25 ++++---- Makefile | 7 +++ requirements.txt | 60 ++++++++++++++++++- src/app.py | 1 + src/config.py | 12 ++++ src/file_stack.py | 32 ++++++++++ src/message/__init__.py | 6 ++ src/message/message.py | 17 ++++++ src/message/message_composer.py | 9 +++ src/message/message_sender/__init__.py | 3 + src/message/message_sender/message_sender.py | 8 +++ .../message_sender/message_sender_strategy.py | 6 ++ src/message/message_sender/rat_strategy.py | 12 ++++ src/message/prompt_service.py | 16 +++++ src/recognizer/__init__.py | 4 ++ src/recognizer/fast_whisper_strategy.py | 5 ++ src/recognizer/recognizer.py | 8 +++ src/recognizer/recognizer_strategy.py | 6 ++ src/recognizer/whisper_strategy.py | 5 ++ 19 files changed, 228 insertions(+), 14 deletions(-) create mode 100644 Makefile create mode 100644 src/config.py create mode 100644 src/file_stack.py create mode 100644 src/message/__init__.py create mode 100644 src/message/message.py create mode 100644 src/message/message_composer.py create mode 100644 src/message/message_sender/__init__.py create mode 100644 src/message/message_sender/message_sender.py create mode 100644 src/message/message_sender/message_sender_strategy.py create mode 100644 src/message/message_sender/rat_strategy.py create mode 100644 src/message/prompt_service.py create mode 100644 src/recognizer/__init__.py create mode 100644 src/recognizer/fast_whisper_strategy.py create mode 100644 src/recognizer/recognizer.py create mode 100644 src/recognizer/recognizer_strategy.py create mode 100644 src/recognizer/whisper_strategy.py diff --git a/Dockerfile b/Dockerfile index 3b3403f..b98f321 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,23 @@ FROM python:3.10-slim +ENV FLASK_APP=src/app.py +ARG PIP_REQ_FILE=requirements.txt + WORKDIR /app -COPY requirements.txt /app -RUN apt-get update && apt-get install git -y -RUN pip3 install -r requirements.txt -RUN pip3 install "git+https://github.com/openai/whisper.git" -RUN apt-get install -y ffmpeg +RUN apt update && apt install git ffmpeg -y && \ + pip3 install "git+https://github.com/openai/whisper.git" -RUN whisper --model medium --language ru dummy.wav; exit 0 -RUN whisper --model small --language ru dummy.wav; exit 0 +RUN whisper --model medium --language ru dummy.wav; exit 0 && \ + whisper --model small --language ru dummy.wav; exit 0 -COPY . . +COPY src/ src/ + +# Separate requirements installation to keep other dependencies +# in cache +COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE} +RUN pip3 install -r ${PIP_REQ_FILE} EXPOSE 5000 -ENV FLASK_APP=src/app.py - -CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"] +CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..622219d --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +run: + PYTORCH_NO_CUDA_MEMORY_CACHING=1 \ + PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \ + CUDA_LAUNCH_BLOCKING=1 \ + FLASK_APP=src/app.py \ + PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \ + flask run --host=0.0.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a3e0ffb..46071bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,62 @@ -flask==3.0.2 -Jinja2==3.1.3 +asgiref==3.7.2 +av==11.0.0 blinker==1.7.0 -Werkzeug==3.0.1 +certifi==2024.2.2 +charset-normalizer==3.3.2 click==8.1.7 +coloredlogs==15.0.1 +ctranslate2==4.0.0 +Cython==3.0.8 +dtw-python==1.3.1 +faster-whisper==1.0.0 +filelock==3.13.1 +Flask==3.0.2 +flatbuffers==23.5.26 +fsspec==2024.2.0 +huggingface-hub==0.21.3 +humanfriendly==10.0 +idna==3.6 itsdangerous==2.1.2 +Jinja2==3.1.3 +llvmlite==0.42.0 MarkupSafe==2.1.5 +more-itertools==10.2.0 +mpmath==1.3.0 +networkx==3.2.1 +numba==0.59.0 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.3.101 +nvidia-nvtx-cu12==12.1.105 +onnxruntime==1.17.1 +openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab +packaging==23.2 +pillow==10.2.0 +protobuf==4.25.3 python-dotenv==1.0.1 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +scipy==1.12.0 +six==1.16.0 +sympy==1.12 +tiktoken==0.6.0 +tokenizers==0.15.2 +torch==2.2.1 +torchaudio==2.2.1 +torchvision==0.17.1 +tqdm==4.66.2 +triton==2.2.0 +typing_extensions==4.10.0 +urllib3==2.2.1 +Werkzeug==3.0.1 +whisper-timestamped==1.15.0 \ No newline at end of file diff --git a/src/app.py b/src/app.py index 25ac26a..f42943d 100644 --- a/src/app.py +++ b/src/app.py @@ -32,6 +32,7 @@ def recognize_files(handler_fn): results = [] for filename, handle in request.files.items(): + # temp = NamedTemporaryFile() handle.save(temp) result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT) diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..e016740 --- /dev/null +++ b/src/config.py @@ -0,0 +1,12 @@ +import os +import torch +from dotenv import load_dotenv + +load_dotenv() + +HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар' +HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium' +HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru' + +# Check if NVIDIA GPU is available +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" \ No newline at end of file diff --git a/src/file_stack.py b/src/file_stack.py new file mode 100644 index 0000000..28b223f --- /dev/null +++ b/src/file_stack.py @@ -0,0 +1,32 @@ +from threading import Thread + +from recognizer import Recognizer +from message import MessageComposer + +class WavStack: + def __init__(self, recognizer: Recognizer, message_composer: MessageComposer): + self._stack = [] + self._recognizer = recognizer + self._message_composer = message_composer + self._running = False + + def append(self, file): + self._stack.append(file) + + def loop(self): + self._running = True + while self._running: + if self._stack: + file = self._stack.pop(0) + recognized_text = self._recognizer.recognize(file) + message = self._message_composer.compose(recognized_text) + + if message.has_prompt(): + message.send() + + def start_loop_in_thread(self): + thread = Thread(target=self.loop) + thread.start() + + def stop_loop(self): + self._running = False \ No newline at end of file diff --git a/src/message/__init__.py b/src/message/__init__.py new file mode 100644 index 0000000..52ede6f --- /dev/null +++ b/src/message/__init__.py @@ -0,0 +1,6 @@ +from message.prompt_service import PromptService +from message.message_sender.message_sender import MessageSender +from message.message_composer import MessageComposer +from message.message import Message + +import message.message_sender as message_sender diff --git a/src/message/message.py b/src/message/message.py new file mode 100644 index 0000000..d67274c --- /dev/null +++ b/src/message/message.py @@ -0,0 +1,17 @@ +from message import PromptService, MessageSender + +class Message: + def __init__(self, prompt_service: PromptService, message_sender: MessageSender, \ + recognized_text: str): + self._prompt_service = prompt_service + self._message_sender = message_sender + self._recognized_text = recognized_text + + def has_prompt(self) -> bool: + return self._prompt_service.has_prompt(self._recognized_text) + + def send(self): + self._message_sender.send(self._generate_response()) + + def _generate_response(self) -> str: + return self._prompt_service.filter_words_with_prompt(self._recognized_text) \ No newline at end of file diff --git a/src/message/message_composer.py b/src/message/message_composer.py new file mode 100644 index 0000000..8dca533 --- /dev/null +++ b/src/message/message_composer.py @@ -0,0 +1,9 @@ +from message import Message, PromptService, MessageSender + +class MessageComposer: + def __init__(self, prompt_service: PromptService, message_sender: MessageSender): + self._prompt_service = prompt_service + self._message_sender = message_sender + + def compose(self, recognized_text) -> Message: + return Message(self._prompt_service, self._message_sender, recognized_text) diff --git a/src/message/message_sender/__init__.py b/src/message/message_sender/__init__.py new file mode 100644 index 0000000..34dfeb2 --- /dev/null +++ b/src/message/message_sender/__init__.py @@ -0,0 +1,3 @@ +from message.message_sender.message_sender import MessageSender +from message.message_sender.message_sender_strategy import MessageSenderStrategy +from message.message_sender.rat_strategy import RatStrategy diff --git a/src/message/message_sender/message_sender.py b/src/message/message_sender/message_sender.py new file mode 100644 index 0000000..d16e33b --- /dev/null +++ b/src/message/message_sender/message_sender.py @@ -0,0 +1,8 @@ +from message.message_sender import MessageSenderStrategy + +class MessageSender: + def __init__(self, strategy: MessageSenderStrategy): + self._strategy = strategy + + def send(self, message): + self._strategy.send(message) diff --git a/src/message/message_sender/message_sender_strategy.py b/src/message/message_sender/message_sender_strategy.py new file mode 100644 index 0000000..1fc618a --- /dev/null +++ b/src/message/message_sender/message_sender_strategy.py @@ -0,0 +1,6 @@ +from abc import ABC, abstractmethod + +class MessageSenderStrategy(ABC): + @abstractmethod + def send(self, message): + pass diff --git a/src/message/message_sender/rat_strategy.py b/src/message/message_sender/rat_strategy.py new file mode 100644 index 0000000..3954229 --- /dev/null +++ b/src/message/message_sender/rat_strategy.py @@ -0,0 +1,12 @@ +import requests + +from message.message_sender import MessageSenderStrategy + +MESSAGE_ENDPOINT = '/message' + +class RatStrategy(MessageSenderStrategy): + def __init__(self, url): + self._url = url + + def send(self, message): + requests.post(self._url + MESSAGE_ENDPOINT, json={'message': message}) diff --git a/src/message/prompt_service.py b/src/message/prompt_service.py new file mode 100644 index 0000000..b443e5b --- /dev/null +++ b/src/message/prompt_service.py @@ -0,0 +1,16 @@ +class PromptService: + def __init__(self, prompt): + self._prompt = prompt + + def has_prompt(self, text: str) -> bool: + for part in text.split(' '): + return part in self._prompt.split(' ') + + def filter_words_with_prompt(self, text: str) -> str: + words = [] + + for part in text.split(' '): + if part in self._prompt.split(' '): + words.append(part) + + return words diff --git a/src/recognizer/__init__.py b/src/recognizer/__init__.py new file mode 100644 index 0000000..f892ae5 --- /dev/null +++ b/src/recognizer/__init__.py @@ -0,0 +1,4 @@ +from recognizer.recognizer import Recognizer +from recognizer.recognizer_strategy import RecognizerStrategy +from recognizer.whisper_strategy import WhisperStrategy +from recognizer.fast_whisper_strategy import FastWhisperStrategy diff --git a/src/recognizer/fast_whisper_strategy.py b/src/recognizer/fast_whisper_strategy.py new file mode 100644 index 0000000..d93680d --- /dev/null +++ b/src/recognizer/fast_whisper_strategy.py @@ -0,0 +1,5 @@ +from recognizer import RecognizerStrategy + +class FastWhisperStrategy(RecognizerStrategy): + def recognize(self, file) -> str: + return '' \ No newline at end of file diff --git a/src/recognizer/recognizer.py b/src/recognizer/recognizer.py new file mode 100644 index 0000000..cb7d704 --- /dev/null +++ b/src/recognizer/recognizer.py @@ -0,0 +1,8 @@ +from recognizer import RecognizerStrategy + +class Recognizer: + def __init__(self, strategy: RecognizerStrategy): + self._strategy = strategy + + def recognize(self, file) -> str: + self._strategy.recognize(file) diff --git a/src/recognizer/recognizer_strategy.py b/src/recognizer/recognizer_strategy.py new file mode 100644 index 0000000..16b718a --- /dev/null +++ b/src/recognizer/recognizer_strategy.py @@ -0,0 +1,6 @@ +from abc import ABC, abstractmethod + +class RecognizerStrategy(ABC): + @abstractmethod + def recognize(self, file) -> str: + pass diff --git a/src/recognizer/whisper_strategy.py b/src/recognizer/whisper_strategy.py new file mode 100644 index 0000000..2fcf3af --- /dev/null +++ b/src/recognizer/whisper_strategy.py @@ -0,0 +1,5 @@ +from recognizer import RecognizerStrategy + +class WhisperStrategy(RecognizerStrategy): + def recognize(self, file) -> str: + return '' \ No newline at end of file