diff --git a/Dockerfile b/Dockerfile index 3b3403f..b98f321 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,23 @@ FROM python:3.10-slim +ENV FLASK_APP=src/app.py +ARG PIP_REQ_FILE=requirements.txt + WORKDIR /app -COPY requirements.txt /app -RUN apt-get update && apt-get install git -y -RUN pip3 install -r requirements.txt -RUN pip3 install "git+https://github.com/openai/whisper.git" -RUN apt-get install -y ffmpeg +RUN apt update && apt install git ffmpeg -y && \ + pip3 install "git+https://github.com/openai/whisper.git" -RUN whisper --model medium --language ru dummy.wav; exit 0 -RUN whisper --model small --language ru dummy.wav; exit 0 +RUN whisper --model medium --language ru dummy.wav; exit 0 && \ + whisper --model small --language ru dummy.wav; exit 0 -COPY . . +COPY src/ src/ + +# Separate requirements installation to keep other dependencies +# in cache +COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE} +RUN pip3 install -r ${PIP_REQ_FILE} EXPOSE 5000 -ENV FLASK_APP=src/app.py - -CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"] +CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..622219d --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +run: + PYTORCH_NO_CUDA_MEMORY_CACHING=1 \ + PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \ + CUDA_LAUNCH_BLOCKING=1 \ + FLASK_APP=src/app.py \ + PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \ + flask run --host=0.0.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a3e0ffb..46071bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,62 @@ -flask==3.0.2 -Jinja2==3.1.3 +asgiref==3.7.2 +av==11.0.0 blinker==1.7.0 -Werkzeug==3.0.1 +certifi==2024.2.2 +charset-normalizer==3.3.2 click==8.1.7 +coloredlogs==15.0.1 +ctranslate2==4.0.0 +Cython==3.0.8 +dtw-python==1.3.1 +faster-whisper==1.0.0 +filelock==3.13.1 +Flask==3.0.2 +flatbuffers==23.5.26 +fsspec==2024.2.0 +huggingface-hub==0.21.3 +humanfriendly==10.0 +idna==3.6 itsdangerous==2.1.2 +Jinja2==3.1.3 +llvmlite==0.42.0 MarkupSafe==2.1.5 +more-itertools==10.2.0 +mpmath==1.3.0 +networkx==3.2.1 +numba==0.59.0 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.3.101 +nvidia-nvtx-cu12==12.1.105 +onnxruntime==1.17.1 +openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab +packaging==23.2 +pillow==10.2.0 +protobuf==4.25.3 python-dotenv==1.0.1 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +scipy==1.12.0 +six==1.16.0 +sympy==1.12 +tiktoken==0.6.0 +tokenizers==0.15.2 +torch==2.2.1 +torchaudio==2.2.1 +torchvision==0.17.1 +tqdm==4.66.2 +triton==2.2.0 +typing_extensions==4.10.0 +urllib3==2.2.1 +Werkzeug==3.0.1 +whisper-timestamped==1.15.0 \ No newline at end of file diff --git a/src/app.py b/src/app.py index 25ac26a..f42943d 100644 --- a/src/app.py +++ b/src/app.py @@ -32,6 +32,7 @@ def recognize_files(handler_fn): results = [] for filename, handle in request.files.items(): + # temp = NamedTemporaryFile() handle.save(temp) result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT) diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..e016740 --- /dev/null +++ b/src/config.py @@ -0,0 +1,12 @@ +import os +import torch +from dotenv import load_dotenv + +load_dotenv() + +HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар' +HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium' +HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru' + +# Check if NVIDIA GPU is available +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" \ No newline at end of file diff --git a/src/file_stack.py b/src/file_stack.py new file mode 100644 index 0000000..28b223f --- /dev/null +++ b/src/file_stack.py @@ -0,0 +1,32 @@ +from threading import Thread + +from recognizer import Recognizer +from message import MessageComposer + +class WavStack: + def __init__(self, recognizer: Recognizer, message_composer: MessageComposer): + self._stack = [] + self._recognizer = recognizer + self._message_composer = message_composer + self._running = False + + def append(self, file): + self._stack.append(file) + + def loop(self): + self._running = True + while self._running: + if self._stack: + file = self._stack.pop(0) + recognized_text = self._recognizer.recognize(file) + message = self._message_composer.compose(recognized_text) + + if message.has_prompt(): + message.send() + + def start_loop_in_thread(self): + thread = Thread(target=self.loop) + thread.start() + + def stop_loop(self): + self._running = False \ No newline at end of file diff --git a/src/message/__init__.py b/src/message/__init__.py new file mode 100644 index 0000000..52ede6f --- /dev/null +++ b/src/message/__init__.py @@ -0,0 +1,6 @@ +from message.prompt_service import PromptService +from message.message_sender.message_sender import MessageSender +from message.message_composer import MessageComposer +from message.message import Message + +import message.message_sender as message_sender diff --git a/src/message/message.py b/src/message/message.py new file mode 100644 index 0000000..d67274c --- /dev/null +++ b/src/message/message.py @@ -0,0 +1,17 @@ +from message import PromptService, MessageSender + +class Message: + def __init__(self, prompt_service: PromptService, message_sender: MessageSender, \ + recognized_text: str): + self._prompt_service = prompt_service + self._message_sender = message_sender + self._recognized_text = recognized_text + + def has_prompt(self) -> bool: + return self._prompt_service.has_prompt(self._recognized_text) + + def send(self): + self._message_sender.send(self._generate_response()) + + def _generate_response(self) -> str: + return self._prompt_service.filter_words_with_prompt(self._recognized_text) \ No newline at end of file diff --git a/src/message/message_composer.py b/src/message/message_composer.py new file mode 100644 index 0000000..8dca533 --- /dev/null +++ b/src/message/message_composer.py @@ -0,0 +1,9 @@ +from message import Message, PromptService, MessageSender + +class MessageComposer: + def __init__(self, prompt_service: PromptService, message_sender: MessageSender): + self._prompt_service = prompt_service + self._message_sender = message_sender + + def compose(self, recognized_text) -> Message: + return Message(self._prompt_service, self._message_sender, recognized_text) diff --git a/src/message/message_sender/__init__.py b/src/message/message_sender/__init__.py new file mode 100644 index 0000000..34dfeb2 --- /dev/null +++ b/src/message/message_sender/__init__.py @@ -0,0 +1,3 @@ +from message.message_sender.message_sender import MessageSender +from message.message_sender.message_sender_strategy import MessageSenderStrategy +from message.message_sender.rat_strategy import RatStrategy diff --git a/src/message/message_sender/message_sender.py b/src/message/message_sender/message_sender.py new file mode 100644 index 0000000..d16e33b --- /dev/null +++ b/src/message/message_sender/message_sender.py @@ -0,0 +1,8 @@ +from message.message_sender import MessageSenderStrategy + +class MessageSender: + def __init__(self, strategy: MessageSenderStrategy): + self._strategy = strategy + + def send(self, message): + self._strategy.send(message) diff --git a/src/message/message_sender/message_sender_strategy.py b/src/message/message_sender/message_sender_strategy.py new file mode 100644 index 0000000..1fc618a --- /dev/null +++ b/src/message/message_sender/message_sender_strategy.py @@ -0,0 +1,6 @@ +from abc import ABC, abstractmethod + +class MessageSenderStrategy(ABC): + @abstractmethod + def send(self, message): + pass diff --git a/src/message/message_sender/rat_strategy.py b/src/message/message_sender/rat_strategy.py new file mode 100644 index 0000000..3954229 --- /dev/null +++ b/src/message/message_sender/rat_strategy.py @@ -0,0 +1,12 @@ +import requests + +from message.message_sender import MessageSenderStrategy + +MESSAGE_ENDPOINT = '/message' + +class RatStrategy(MessageSenderStrategy): + def __init__(self, url): + self._url = url + + def send(self, message): + requests.post(self._url + MESSAGE_ENDPOINT, json={'message': message}) diff --git a/src/message/prompt_service.py b/src/message/prompt_service.py new file mode 100644 index 0000000..b443e5b --- /dev/null +++ b/src/message/prompt_service.py @@ -0,0 +1,16 @@ +class PromptService: + def __init__(self, prompt): + self._prompt = prompt + + def has_prompt(self, text: str) -> bool: + for part in text.split(' '): + return part in self._prompt.split(' ') + + def filter_words_with_prompt(self, text: str) -> str: + words = [] + + for part in text.split(' '): + if part in self._prompt.split(' '): + words.append(part) + + return words diff --git a/src/recognizer/__init__.py b/src/recognizer/__init__.py new file mode 100644 index 0000000..f892ae5 --- /dev/null +++ b/src/recognizer/__init__.py @@ -0,0 +1,4 @@ +from recognizer.recognizer import Recognizer +from recognizer.recognizer_strategy import RecognizerStrategy +from recognizer.whisper_strategy import WhisperStrategy +from recognizer.fast_whisper_strategy import FastWhisperStrategy diff --git a/src/recognizer/fast_whisper_strategy.py b/src/recognizer/fast_whisper_strategy.py new file mode 100644 index 0000000..d93680d --- /dev/null +++ b/src/recognizer/fast_whisper_strategy.py @@ -0,0 +1,5 @@ +from recognizer import RecognizerStrategy + +class FastWhisperStrategy(RecognizerStrategy): + def recognize(self, file) -> str: + return '' \ No newline at end of file diff --git a/src/recognizer/recognizer.py b/src/recognizer/recognizer.py new file mode 100644 index 0000000..cb7d704 --- /dev/null +++ b/src/recognizer/recognizer.py @@ -0,0 +1,8 @@ +from recognizer import RecognizerStrategy + +class Recognizer: + def __init__(self, strategy: RecognizerStrategy): + self._strategy = strategy + + def recognize(self, file) -> str: + self._strategy.recognize(file) diff --git a/src/recognizer/recognizer_strategy.py b/src/recognizer/recognizer_strategy.py new file mode 100644 index 0000000..16b718a --- /dev/null +++ b/src/recognizer/recognizer_strategy.py @@ -0,0 +1,6 @@ +from abc import ABC, abstractmethod + +class RecognizerStrategy(ABC): + @abstractmethod + def recognize(self, file) -> str: + pass diff --git a/src/recognizer/whisper_strategy.py b/src/recognizer/whisper_strategy.py new file mode 100644 index 0000000..2fcf3af --- /dev/null +++ b/src/recognizer/whisper_strategy.py @@ -0,0 +1,5 @@ +from recognizer import RecognizerStrategy + +class WhisperStrategy(RecognizerStrategy): + def recognize(self, file) -> str: + return '' \ No newline at end of file