Compare commits
1 Commits
main
...
feature/ra
Author | SHA1 | Date | |
---|---|---|---|
e4fee65c38 |
@ -140,3 +140,8 @@ ENV/
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
|
||||
#
|
||||
**/*.wav
|
||||
**/*.mp3
|
||||
|
23
Dockerfile
23
Dockerfile
@ -1,20 +1,23 @@
|
||||
FROM python:3.10-slim
|
||||
|
||||
ENV FLASK_APP=src/app.py
|
||||
ARG PIP_REQ_FILE=requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt /app
|
||||
RUN apt-get update && apt-get install git -y
|
||||
RUN pip3 install -r requirements.txt
|
||||
RUN pip3 install "git+https://github.com/openai/whisper.git"
|
||||
RUN apt-get install -y ffmpeg
|
||||
RUN apt update && apt install git ffmpeg -y && \
|
||||
pip3 install "git+https://github.com/openai/whisper.git"
|
||||
|
||||
RUN whisper --model medium --language ru dummy.wav; exit 0
|
||||
RUN whisper --model small --language ru dummy.wav; exit 0
|
||||
RUN whisper --model medium --language ru dummy.wav; exit 0 && \
|
||||
whisper --model small --language ru dummy.wav; exit 0
|
||||
|
||||
COPY . .
|
||||
COPY src/ src/
|
||||
|
||||
# Separate requirements installation to keep other dependencies
|
||||
# in cache
|
||||
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
|
||||
RUN pip3 install -r ${PIP_REQ_FILE}
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
ENV FLASK_APP=src/app.py
|
||||
|
||||
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
||||
|
7
Makefile
Normal file
7
Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
run:
|
||||
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
|
||||
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
|
||||
CUDA_LAUNCH_BLOCKING=1 \
|
||||
FLASK_APP=src/app.py \
|
||||
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
|
||||
flask run --host=0.0.0.0
|
@ -1,8 +1,62 @@
|
||||
flask==3.0.2
|
||||
Jinja2==3.1.3
|
||||
asgiref==3.7.2
|
||||
av==11.0.0
|
||||
blinker==1.7.0
|
||||
Werkzeug==3.0.1
|
||||
certifi==2024.2.2
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
coloredlogs==15.0.1
|
||||
ctranslate2==4.0.0
|
||||
Cython==3.0.8
|
||||
dtw-python==1.3.1
|
||||
faster-whisper==1.0.0
|
||||
filelock==3.13.1
|
||||
Flask==3.0.2
|
||||
flatbuffers==23.5.26
|
||||
fsspec==2024.2.0
|
||||
huggingface-hub==0.21.3
|
||||
humanfriendly==10.0
|
||||
idna==3.6
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.3
|
||||
llvmlite==0.42.0
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==10.2.0
|
||||
mpmath==1.3.0
|
||||
networkx==3.2.1
|
||||
numba==0.59.0
|
||||
numpy==1.26.4
|
||||
nvidia-cublas-cu12==12.1.3.1
|
||||
nvidia-cuda-cupti-cu12==12.1.105
|
||||
nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
nvidia-cuda-runtime-cu12==12.1.105
|
||||
nvidia-cudnn-cu12==8.9.2.26
|
||||
nvidia-cufft-cu12==11.0.2.54
|
||||
nvidia-curand-cu12==10.3.2.106
|
||||
nvidia-cusolver-cu12==11.4.5.107
|
||||
nvidia-cusparse-cu12==12.1.0.106
|
||||
nvidia-nccl-cu12==2.19.3
|
||||
nvidia-nvjitlink-cu12==12.3.101
|
||||
nvidia-nvtx-cu12==12.1.105
|
||||
onnxruntime==1.17.1
|
||||
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
|
||||
packaging==23.2
|
||||
pillow==10.2.0
|
||||
protobuf==4.25.3
|
||||
python-dotenv==1.0.1
|
||||
PyYAML==6.0.1
|
||||
regex==2023.12.25
|
||||
requests==2.31.0
|
||||
scipy==1.12.0
|
||||
six==1.16.0
|
||||
sympy==1.12
|
||||
tiktoken==0.6.0
|
||||
tokenizers==0.15.2
|
||||
torch==2.2.1
|
||||
torchaudio==2.2.1
|
||||
torchvision==0.17.1
|
||||
tqdm==4.66.2
|
||||
triton==2.2.0
|
||||
typing_extensions==4.10.0
|
||||
urllib3==2.2.1
|
||||
Werkzeug==3.0.1
|
||||
whisper-timestamped==1.15.0
|
||||
|
112
src/app.py
112
src/app.py
@ -1,67 +1,131 @@
|
||||
from ctranslate2.extensions import asyncio
|
||||
from flask import Flask, abort, request
|
||||
from tempfile import NamedTemporaryFile
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from torch.functional import Tensor
|
||||
import whisper
|
||||
import torch
|
||||
import sys
|
||||
import re
|
||||
from faster_whisper import WhisperModel
|
||||
from test_utils import elapsed_time
|
||||
from whisper_timestamped import transcribe_timestamped
|
||||
from multiprocessing import Process
|
||||
|
||||
load_dotenv()
|
||||
|
||||
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
|
||||
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
|
||||
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
|
||||
model_size = "small"
|
||||
# tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, distil-small.en
|
||||
HARPYIA_PROMPT = os.getenv("HARPYIA_PROMPT") or "спасите помогите на помощь пожар"
|
||||
HARPYIA_MODEL = os.getenv("HARPYIA_MODEL") or "medium"
|
||||
HARPYIA_LANGUAGE = os.getenv("HARPYIA_LANGUAGE") or "ru"
|
||||
|
||||
# Check if NVIDIA GPU is available
|
||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
DEVICE = "cpu"
|
||||
|
||||
# Load the Whisper model:
|
||||
model = whisper.load_model(HARPYIA_MODEL, device=DEVICE)
|
||||
model = WhisperModel(
|
||||
model_size,
|
||||
device=DEVICE,
|
||||
num_workers=6,
|
||||
cpu_threads=10,
|
||||
# in_memory=True,
|
||||
)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def hello():
|
||||
return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route."
|
||||
|
||||
|
||||
def recognize_files(handler_fn):
|
||||
if not request.files:
|
||||
abort(400)
|
||||
|
||||
results = []
|
||||
|
||||
asyncio.get_running_loop()
|
||||
for filename, handle in request.files.items():
|
||||
temp = NamedTemporaryFile()
|
||||
handle.save(temp)
|
||||
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
|
||||
results.append({
|
||||
'filename': filename,
|
||||
'transcript': handler_fn(result['text']),
|
||||
})
|
||||
|
||||
audio = prepare_file(temp.name)
|
||||
res = trans(audio)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"filename": filename,
|
||||
"transcript": res,
|
||||
}
|
||||
)
|
||||
|
||||
print(results, file=sys.stderr)
|
||||
return {'results': results}
|
||||
return {"results": results}
|
||||
|
||||
@app.route('/recognize', methods=['POST'])
|
||||
def recognize():
|
||||
|
||||
initprompt = [
|
||||
"один",
|
||||
"два",
|
||||
"три",
|
||||
"четыре",
|
||||
"пять",
|
||||
"шесть",
|
||||
"семь",
|
||||
"восемь",
|
||||
"девять",
|
||||
"десять",
|
||||
"одинадцать",
|
||||
"двенадцать",
|
||||
"тренадцать",
|
||||
"сто",
|
||||
"сот",
|
||||
]
|
||||
|
||||
|
||||
@elapsed_time
|
||||
def trans(audio):
|
||||
|
||||
segments, _ = model.transcribe(
|
||||
audio,
|
||||
language=HARPYIA_LANGUAGE,
|
||||
initial_prompt="семь сот сто",
|
||||
condition_on_previous_text=False,
|
||||
vad_filter=True,
|
||||
beam_size=5,
|
||||
)
|
||||
|
||||
words = []
|
||||
for e in list(segments):
|
||||
words.append(e.text)
|
||||
|
||||
return " ".join(words)
|
||||
|
||||
|
||||
@elapsed_time
|
||||
def prepare_file(filename: str):
|
||||
audio = whisper.load_audio(filename, sr=16000)
|
||||
audio = whisper.pad_or_trim(audio)
|
||||
return audio
|
||||
|
||||
|
||||
@app.route("/recognize", methods=["POST"])
|
||||
async def recognize():
|
||||
return recognize_files(lambda text: text)
|
||||
|
||||
@app.route('/recognize_number', methods=['POST'])
|
||||
|
||||
@app.route("/recognize_number", methods=["POST"])
|
||||
def recognize_number():
|
||||
return recognize_files(transfer_and_clean)
|
||||
|
||||
|
||||
def transfer_and_clean(input_string):
|
||||
number_mapping = {
|
||||
"один": "1",
|
||||
"два": "2",
|
||||
"три": "3"
|
||||
}
|
||||
number_mapping = {"один": "1", "два": "2", "три": "3"}
|
||||
|
||||
for word, number in number_mapping.items():
|
||||
input_string = input_string.replace(word, number)
|
||||
|
||||
input_string = re.sub(r'[^\d]+', '', input_string)
|
||||
|
||||
return input_string
|
||||
|
||||
input_string = re.sub(r"[^\d]+", "", input_string)
|
||||
|
||||
return input_string
|
||||
|
39
src/test_utils.py
Normal file
39
src/test_utils.py
Normal file
@ -0,0 +1,39 @@
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
def elapsed_time_wrapper(unique_id: str = ""):
|
||||
|
||||
def decorator(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
if not unique_id == "":
|
||||
print(
|
||||
f"[{unique_id}] Executed in {execution_time} seconds",
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
print(f"Executed in {execution_time} seconds", file=sys.stderr)
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def elapsed_time(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
print(
|
||||
f"Executed in {execution_time} seconds",
|
||||
sep="\n",
|
||||
)
|
||||
return result
|
||||
|
||||
return wrapper
|
Loading…
x
Reference in New Issue
Block a user