From e4fee65c38f69a05f8f6616b563b730bd7a6935a Mon Sep 17 00:00:00 2001
From: weissmall <dany.small.work@gmail.com>
Date: Fri, 1 Mar 2024 13:57:11 +0300
Subject: [PATCH] Works in 700ms

---
 .dockerignore     |   5 +++
 Dockerfile        |  23 +++++-----
 Makefile          |   7 +++
 requirements.txt  |  60 +++++++++++++++++++++++--
 src/app.py        | 112 ++++++++++++++++++++++++++++++++++++----------
 src/test_utils.py |  39 ++++++++++++++++
 6 files changed, 209 insertions(+), 37 deletions(-)
 create mode 100644 Makefile
 create mode 100644 src/test_utils.py

diff --git a/.dockerignore b/.dockerignore
index 763bdb6..ed29347 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -140,3 +140,8 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+
+#
+**/*.wav
+**/*.mp3
diff --git a/Dockerfile b/Dockerfile
index 3b3403f..51c7c0b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,20 +1,23 @@
 FROM python:3.10-slim
 
+ENV FLASK_APP=src/app.py
+ARG PIP_REQ_FILE=requirements.txt
+
 WORKDIR /app
 
-COPY requirements.txt /app
-RUN apt-get update && apt-get install git -y
-RUN pip3 install -r requirements.txt
-RUN pip3 install "git+https://github.com/openai/whisper.git" 
-RUN apt-get install -y ffmpeg
+RUN apt update && apt install git ffmpeg -y && \
+    pip3 install "git+https://github.com/openai/whisper.git"
 
-RUN whisper --model medium --language ru dummy.wav; exit 0
-RUN whisper --model small --language ru dummy.wav; exit 0
+RUN whisper --model medium --language ru dummy.wav; exit 0 && \
+    whisper --model small --language ru dummy.wav; exit 0
 
-COPY . .
+COPY src/ src/
+
+# Separate requirements installation to keep other dependencies
+# in cache
+COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
+RUN pip3 install -r ${PIP_REQ_FILE}
 
 EXPOSE 5000
 
-ENV FLASK_APP=src/app.py
-
 CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8adfcdb
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,7 @@
+run:
+	PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
+	PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
+	CUDA_LAUNCH_BLOCKING=1 \
+	FLASK_APP=src/app.py \
+	PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
+	flask run --host=0.0.0.0
diff --git a/requirements.txt b/requirements.txt
index a3e0ffb..b147807 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,62 @@
-flask==3.0.2
-Jinja2==3.1.3
+asgiref==3.7.2
+av==11.0.0
 blinker==1.7.0
-Werkzeug==3.0.1
+certifi==2024.2.2
+charset-normalizer==3.3.2
 click==8.1.7
+coloredlogs==15.0.1
+ctranslate2==4.0.0
+Cython==3.0.8
+dtw-python==1.3.1
+faster-whisper==1.0.0
+filelock==3.13.1
+Flask==3.0.2
+flatbuffers==23.5.26
+fsspec==2024.2.0
+huggingface-hub==0.21.3
+humanfriendly==10.0
+idna==3.6
 itsdangerous==2.1.2
+Jinja2==3.1.3
+llvmlite==0.42.0
 MarkupSafe==2.1.5
+more-itertools==10.2.0
+mpmath==1.3.0
+networkx==3.2.1
+numba==0.59.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+onnxruntime==1.17.1
+openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
+packaging==23.2
+pillow==10.2.0
+protobuf==4.25.3
 python-dotenv==1.0.1
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+scipy==1.12.0
+six==1.16.0
+sympy==1.12
+tiktoken==0.6.0
+tokenizers==0.15.2
+torch==2.2.1
+torchaudio==2.2.1
+torchvision==0.17.1
+tqdm==4.66.2
+triton==2.2.0
+typing_extensions==4.10.0
+urllib3==2.2.1
+Werkzeug==3.0.1
+whisper-timestamped==1.15.0
diff --git a/src/app.py b/src/app.py
index 25ac26a..3e1a856 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1,67 +1,131 @@
+from ctranslate2.extensions import asyncio
 from flask import Flask, abort, request
 from tempfile import NamedTemporaryFile
 from dotenv import load_dotenv
 import os
+from torch.functional import Tensor
 import whisper
 import torch
 import sys
 import re
+from faster_whisper import WhisperModel
+from test_utils import elapsed_time
+from whisper_timestamped import transcribe_timestamped
+from multiprocessing import Process
 
 load_dotenv()
-
-HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
-HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
-HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
+model_size = "small"
+# tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, distil-small.en
+HARPYIA_PROMPT = os.getenv("HARPYIA_PROMPT") or "спасите помогите на помощь пожар"
+HARPYIA_MODEL = os.getenv("HARPYIA_MODEL") or "medium"
+HARPYIA_LANGUAGE = os.getenv("HARPYIA_LANGUAGE") or "ru"
 
 # Check if NVIDIA GPU is available
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = "cpu"
 
 # Load the Whisper model:
-model = whisper.load_model(HARPYIA_MODEL, device=DEVICE)
+model = WhisperModel(
+    model_size,
+    device=DEVICE,
+    num_workers=6,
+    cpu_threads=10,
+    # in_memory=True,
+)
 
 app = Flask(__name__)
 
+
 @app.route("/")
 def hello():
     return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route."
 
+
 def recognize_files(handler_fn):
     if not request.files:
         abort(400)
 
     results = []
-
+    asyncio.get_running_loop()
     for filename, handle in request.files.items():
         temp = NamedTemporaryFile()
         handle.save(temp)
-        result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
-        results.append({
-            'filename': filename,
-            'transcript': handler_fn(result['text']),
-        })
+
+        audio = prepare_file(temp.name)
+        res = trans(audio)
+
+        results.append(
+            {
+                "filename": filename,
+                "transcript": res,
+            }
+        )
 
     print(results, file=sys.stderr)
-    return {'results': results}
+    return {"results": results}
 
-@app.route('/recognize', methods=['POST'])
-def recognize():
+
+initprompt = [
+    "один",
+    "два",
+    "три",
+    "четыре",
+    "пять",
+    "шесть",
+    "семь",
+    "восемь",
+    "девять",
+    "десять",
+    "одинадцать",
+    "двенадцать",
+    "тренадцать",
+    "сто",
+    "сот",
+]
+
+
+@elapsed_time
+def trans(audio):
+
+    segments, _ = model.transcribe(
+        audio,
+        language=HARPYIA_LANGUAGE,
+        initial_prompt="семь сот сто",
+        condition_on_previous_text=False,
+        vad_filter=True,
+        beam_size=5,
+    )
+
+    words = []
+    for e in list(segments):
+        words.append(e.text)
+
+    return " ".join(words)
+
+
+@elapsed_time
+def prepare_file(filename: str):
+    audio = whisper.load_audio(filename, sr=16000)
+    audio = whisper.pad_or_trim(audio)
+    return audio
+
+
+@app.route("/recognize", methods=["POST"])
+async def recognize():
     return recognize_files(lambda text: text)
 
-@app.route('/recognize_number', methods=['POST'])
+
+@app.route("/recognize_number", methods=["POST"])
 def recognize_number():
     return recognize_files(transfer_and_clean)
 
+
 def transfer_and_clean(input_string):
-    number_mapping = {
-        "один": "1",
-        "два": "2",
-        "три": "3"
-    }
+    number_mapping = {"один": "1", "два": "2", "три": "3"}
 
     for word, number in number_mapping.items():
         input_string = input_string.replace(word, number)
-    
-    input_string = re.sub(r'[^\d]+', '', input_string)
-    
-    return input_string
 
+    input_string = re.sub(r"[^\d]+", "", input_string)
+
+    return input_string
diff --git a/src/test_utils.py b/src/test_utils.py
new file mode 100644
index 0000000..e204988
--- /dev/null
+++ b/src/test_utils.py
@@ -0,0 +1,39 @@
+import time
+import sys
+
+
+def elapsed_time_wrapper(unique_id: str = ""):
+
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            start_time = time.time()
+            result = func(*args, **kwargs)
+            end_time = time.time()
+            execution_time = end_time - start_time
+            if not unique_id == "":
+                print(
+                    f"[{unique_id}] Executed in {execution_time} seconds",
+                    file=sys.stderr,
+                )
+            else:
+                print(f"Executed in {execution_time} seconds", file=sys.stderr)
+            return result
+
+        return wrapper
+
+    return decorator
+
+
+def elapsed_time(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_time = end_time - start_time
+        print(
+            f"Executed in {execution_time} seconds",
+            sep="\n",
+        )
+        return result
+
+    return wrapper