Compare commits

...

1 Commits

Author SHA1 Message Date
e4fee65c38 Works in 700ms 2024-03-01 13:57:11 +03:00
6 changed files with 209 additions and 37 deletions

View File

@ -140,3 +140,8 @@ ENV/
# mypy
.mypy_cache/
#
**/*.wav
**/*.mp3

View File

@ -1,20 +1,23 @@
FROM python:3.10-slim
ENV FLASK_APP=src/app.py
ARG PIP_REQ_FILE=requirements.txt
WORKDIR /app
COPY requirements.txt /app
RUN apt-get update && apt-get install git -y
RUN pip3 install -r requirements.txt
RUN pip3 install "git+https://github.com/openai/whisper.git"
RUN apt-get install -y ffmpeg
RUN apt update && apt install git ffmpeg -y && \
pip3 install "git+https://github.com/openai/whisper.git"
RUN whisper --model medium --language ru dummy.wav; exit 0
RUN whisper --model small --language ru dummy.wav; exit 0
RUN whisper --model medium --language ru dummy.wav; exit 0 && \
whisper --model small --language ru dummy.wav; exit 0
COPY . .
COPY src/ src/
# Separate requirements installation to keep other dependencies
# in cache
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
RUN pip3 install -r ${PIP_REQ_FILE}
EXPOSE 5000
ENV FLASK_APP=src/app.py
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]

7
Makefile Normal file
View File

@ -0,0 +1,7 @@
run:
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
CUDA_LAUNCH_BLOCKING=1 \
FLASK_APP=src/app.py \
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
flask run --host=0.0.0.0

View File

@ -1,8 +1,62 @@
flask==3.0.2
Jinja2==3.1.3
asgiref==3.7.2
av==11.0.0
blinker==1.7.0
Werkzeug==3.0.1
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
coloredlogs==15.0.1
ctranslate2==4.0.0
Cython==3.0.8
dtw-python==1.3.1
faster-whisper==1.0.0
filelock==3.13.1
Flask==3.0.2
flatbuffers==23.5.26
fsspec==2024.2.0
huggingface-hub==0.21.3
humanfriendly==10.0
idna==3.6
itsdangerous==2.1.2
Jinja2==3.1.3
llvmlite==0.42.0
MarkupSafe==2.1.5
more-itertools==10.2.0
mpmath==1.3.0
networkx==3.2.1
numba==0.59.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.19.3
nvidia-nvjitlink-cu12==12.3.101
nvidia-nvtx-cu12==12.1.105
onnxruntime==1.17.1
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
packaging==23.2
pillow==10.2.0
protobuf==4.25.3
python-dotenv==1.0.1
PyYAML==6.0.1
regex==2023.12.25
requests==2.31.0
scipy==1.12.0
six==1.16.0
sympy==1.12
tiktoken==0.6.0
tokenizers==0.15.2
torch==2.2.1
torchaudio==2.2.1
torchvision==0.17.1
tqdm==4.66.2
triton==2.2.0
typing_extensions==4.10.0
urllib3==2.2.1
Werkzeug==3.0.1
whisper-timestamped==1.15.0

View File

@ -1,67 +1,131 @@
from ctranslate2.extensions import asyncio
from flask import Flask, abort, request
from tempfile import NamedTemporaryFile
from dotenv import load_dotenv
import os
from torch.functional import Tensor
import whisper
import torch
import sys
import re
from faster_whisper import WhisperModel
from test_utils import elapsed_time
from whisper_timestamped import transcribe_timestamped
from multiprocessing import Process
load_dotenv()
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
model_size = "small"
# tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, distil-small.en
HARPYIA_PROMPT = os.getenv("HARPYIA_PROMPT") or "спасите помогите на помощь пожар"
HARPYIA_MODEL = os.getenv("HARPYIA_MODEL") or "medium"
HARPYIA_LANGUAGE = os.getenv("HARPYIA_LANGUAGE") or "ru"
# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE = "cpu"
# Load the Whisper model:
model = whisper.load_model(HARPYIA_MODEL, device=DEVICE)
model = WhisperModel(
model_size,
device=DEVICE,
num_workers=6,
cpu_threads=10,
# in_memory=True,
)
app = Flask(__name__)
@app.route("/")
def hello():
return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route."
def recognize_files(handler_fn):
if not request.files:
abort(400)
results = []
asyncio.get_running_loop()
for filename, handle in request.files.items():
temp = NamedTemporaryFile()
handle.save(temp)
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
results.append({
'filename': filename,
'transcript': handler_fn(result['text']),
})
audio = prepare_file(temp.name)
res = trans(audio)
results.append(
{
"filename": filename,
"transcript": res,
}
)
print(results, file=sys.stderr)
return {'results': results}
return {"results": results}
@app.route('/recognize', methods=['POST'])
def recognize():
initprompt = [
"один",
"два",
"три",
"четыре",
"пять",
"шесть",
"семь",
"восемь",
"девять",
"десять",
"одинадцать",
"двенадцать",
"тренадцать",
"сто",
"сот",
]
@elapsed_time
def trans(audio):
segments, _ = model.transcribe(
audio,
language=HARPYIA_LANGUAGE,
initial_prompt="семь сот сто",
condition_on_previous_text=False,
vad_filter=True,
beam_size=5,
)
words = []
for e in list(segments):
words.append(e.text)
return " ".join(words)
@elapsed_time
def prepare_file(filename: str):
audio = whisper.load_audio(filename, sr=16000)
audio = whisper.pad_or_trim(audio)
return audio
@app.route("/recognize", methods=["POST"])
async def recognize():
return recognize_files(lambda text: text)
@app.route('/recognize_number', methods=['POST'])
@app.route("/recognize_number", methods=["POST"])
def recognize_number():
return recognize_files(transfer_and_clean)
def transfer_and_clean(input_string):
number_mapping = {
"один": "1",
"два": "2",
"три": "3"
}
number_mapping = {"один": "1", "два": "2", "три": "3"}
for word, number in number_mapping.items():
input_string = input_string.replace(word, number)
input_string = re.sub(r'[^\d]+', '', input_string)
return input_string
input_string = re.sub(r"[^\d]+", "", input_string)
return input_string

39
src/test_utils.py Normal file
View File

@ -0,0 +1,39 @@
import time
import sys
def elapsed_time_wrapper(unique_id: str = ""):
def decorator(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
execution_time = end_time - start_time
if not unique_id == "":
print(
f"[{unique_id}] Executed in {execution_time} seconds",
file=sys.stderr,
)
else:
print(f"Executed in {execution_time} seconds", file=sys.stderr)
return result
return wrapper
return decorator
def elapsed_time(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
execution_time = end_time - start_time
print(
f"Executed in {execution_time} seconds",
sep="\n",
)
return result
return wrapper