From e4fee65c38f69a05f8f6616b563b730bd7a6935a Mon Sep 17 00:00:00 2001 From: weissmall Date: Fri, 1 Mar 2024 13:57:11 +0300 Subject: [PATCH] Works in 700ms --- .dockerignore | 5 +++ Dockerfile | 23 +++++----- Makefile | 7 +++ requirements.txt | 60 +++++++++++++++++++++++-- src/app.py | 112 ++++++++++++++++++++++++++++++++++++---------- src/test_utils.py | 39 ++++++++++++++++ 6 files changed, 209 insertions(+), 37 deletions(-) create mode 100644 Makefile create mode 100644 src/test_utils.py diff --git a/.dockerignore b/.dockerignore index 763bdb6..ed29347 100644 --- a/.dockerignore +++ b/.dockerignore @@ -140,3 +140,8 @@ ENV/ # mypy .mypy_cache/ + + +# +**/*.wav +**/*.mp3 diff --git a/Dockerfile b/Dockerfile index 3b3403f..51c7c0b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,23 @@ FROM python:3.10-slim +ENV FLASK_APP=src/app.py +ARG PIP_REQ_FILE=requirements.txt + WORKDIR /app -COPY requirements.txt /app -RUN apt-get update && apt-get install git -y -RUN pip3 install -r requirements.txt -RUN pip3 install "git+https://github.com/openai/whisper.git" -RUN apt-get install -y ffmpeg +RUN apt update && apt install git ffmpeg -y && \ + pip3 install "git+https://github.com/openai/whisper.git" -RUN whisper --model medium --language ru dummy.wav; exit 0 -RUN whisper --model small --language ru dummy.wav; exit 0 +RUN whisper --model medium --language ru dummy.wav; exit 0 && \ + whisper --model small --language ru dummy.wav; exit 0 -COPY . . +COPY src/ src/ + +# Separate requirements installation to keep other dependencies +# in cache +COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE} +RUN pip3 install -r ${PIP_REQ_FILE} EXPOSE 5000 -ENV FLASK_APP=src/app.py - CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8adfcdb --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +run: + PYTORCH_NO_CUDA_MEMORY_CACHING=1 \ + PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \ + CUDA_LAUNCH_BLOCKING=1 \ + FLASK_APP=src/app.py \ + PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \ + flask run --host=0.0.0.0 diff --git a/requirements.txt b/requirements.txt index a3e0ffb..b147807 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,62 @@ -flask==3.0.2 -Jinja2==3.1.3 +asgiref==3.7.2 +av==11.0.0 blinker==1.7.0 -Werkzeug==3.0.1 +certifi==2024.2.2 +charset-normalizer==3.3.2 click==8.1.7 +coloredlogs==15.0.1 +ctranslate2==4.0.0 +Cython==3.0.8 +dtw-python==1.3.1 +faster-whisper==1.0.0 +filelock==3.13.1 +Flask==3.0.2 +flatbuffers==23.5.26 +fsspec==2024.2.0 +huggingface-hub==0.21.3 +humanfriendly==10.0 +idna==3.6 itsdangerous==2.1.2 +Jinja2==3.1.3 +llvmlite==0.42.0 MarkupSafe==2.1.5 +more-itertools==10.2.0 +mpmath==1.3.0 +networkx==3.2.1 +numba==0.59.0 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.3.101 +nvidia-nvtx-cu12==12.1.105 +onnxruntime==1.17.1 +openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab +packaging==23.2 +pillow==10.2.0 +protobuf==4.25.3 python-dotenv==1.0.1 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +scipy==1.12.0 +six==1.16.0 +sympy==1.12 +tiktoken==0.6.0 +tokenizers==0.15.2 +torch==2.2.1 +torchaudio==2.2.1 +torchvision==0.17.1 +tqdm==4.66.2 +triton==2.2.0 +typing_extensions==4.10.0 +urllib3==2.2.1 +Werkzeug==3.0.1 +whisper-timestamped==1.15.0 diff --git a/src/app.py b/src/app.py index 25ac26a..3e1a856 100644 --- a/src/app.py +++ b/src/app.py @@ -1,67 +1,131 @@ +from ctranslate2.extensions import asyncio from flask import Flask, abort, request from tempfile import NamedTemporaryFile from dotenv import load_dotenv import os +from torch.functional import Tensor import whisper import torch import sys import re +from faster_whisper import WhisperModel +from test_utils import elapsed_time +from whisper_timestamped import transcribe_timestamped +from multiprocessing import Process load_dotenv() - -HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар' -HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium' -HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru' +model_size = "small" +# tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, distil-small.en +HARPYIA_PROMPT = os.getenv("HARPYIA_PROMPT") or "спасите помогите на помощь пожар" +HARPYIA_MODEL = os.getenv("HARPYIA_MODEL") or "medium" +HARPYIA_LANGUAGE = os.getenv("HARPYIA_LANGUAGE") or "ru" # Check if NVIDIA GPU is available DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +DEVICE = "cpu" # Load the Whisper model: -model = whisper.load_model(HARPYIA_MODEL, device=DEVICE) +model = WhisperModel( + model_size, + device=DEVICE, + num_workers=6, + cpu_threads=10, + # in_memory=True, +) app = Flask(__name__) + @app.route("/") def hello(): return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route." + def recognize_files(handler_fn): if not request.files: abort(400) results = [] - + asyncio.get_running_loop() for filename, handle in request.files.items(): temp = NamedTemporaryFile() handle.save(temp) - result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT) - results.append({ - 'filename': filename, - 'transcript': handler_fn(result['text']), - }) + + audio = prepare_file(temp.name) + res = trans(audio) + + results.append( + { + "filename": filename, + "transcript": res, + } + ) print(results, file=sys.stderr) - return {'results': results} + return {"results": results} -@app.route('/recognize', methods=['POST']) -def recognize(): + +initprompt = [ + "один", + "два", + "три", + "четыре", + "пять", + "шесть", + "семь", + "восемь", + "девять", + "десять", + "одинадцать", + "двенадцать", + "тренадцать", + "сто", + "сот", +] + + +@elapsed_time +def trans(audio): + + segments, _ = model.transcribe( + audio, + language=HARPYIA_LANGUAGE, + initial_prompt="семь сот сто", + condition_on_previous_text=False, + vad_filter=True, + beam_size=5, + ) + + words = [] + for e in list(segments): + words.append(e.text) + + return " ".join(words) + + +@elapsed_time +def prepare_file(filename: str): + audio = whisper.load_audio(filename, sr=16000) + audio = whisper.pad_or_trim(audio) + return audio + + +@app.route("/recognize", methods=["POST"]) +async def recognize(): return recognize_files(lambda text: text) -@app.route('/recognize_number', methods=['POST']) + +@app.route("/recognize_number", methods=["POST"]) def recognize_number(): return recognize_files(transfer_and_clean) + def transfer_and_clean(input_string): - number_mapping = { - "один": "1", - "два": "2", - "три": "3" - } + number_mapping = {"один": "1", "два": "2", "три": "3"} for word, number in number_mapping.items(): input_string = input_string.replace(word, number) - - input_string = re.sub(r'[^\d]+', '', input_string) - - return input_string + input_string = re.sub(r"[^\d]+", "", input_string) + + return input_string diff --git a/src/test_utils.py b/src/test_utils.py new file mode 100644 index 0000000..e204988 --- /dev/null +++ b/src/test_utils.py @@ -0,0 +1,39 @@ +import time +import sys + + +def elapsed_time_wrapper(unique_id: str = ""): + + def decorator(func): + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + execution_time = end_time - start_time + if not unique_id == "": + print( + f"[{unique_id}] Executed in {execution_time} seconds", + file=sys.stderr, + ) + else: + print(f"Executed in {execution_time} seconds", file=sys.stderr) + return result + + return wrapper + + return decorator + + +def elapsed_time(func): + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + execution_time = end_time - start_time + print( + f"Executed in {execution_time} seconds", + sep="\n", + ) + return result + + return wrapper