Compare commits
1 Commits
main
...
feature/ra
Author | SHA1 | Date | |
---|---|---|---|
e4fee65c38 |
@ -140,3 +140,8 @@ ENV/
|
|||||||
|
|
||||||
# mypy
|
# mypy
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
**/*.wav
|
||||||
|
**/*.mp3
|
||||||
|
23
Dockerfile
23
Dockerfile
@ -1,20 +1,23 @@
|
|||||||
FROM python:3.10-slim
|
FROM python:3.10-slim
|
||||||
|
|
||||||
|
ENV FLASK_APP=src/app.py
|
||||||
|
ARG PIP_REQ_FILE=requirements.txt
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY requirements.txt /app
|
RUN apt update && apt install git ffmpeg -y && \
|
||||||
RUN apt-get update && apt-get install git -y
|
pip3 install "git+https://github.com/openai/whisper.git"
|
||||||
RUN pip3 install -r requirements.txt
|
|
||||||
RUN pip3 install "git+https://github.com/openai/whisper.git"
|
|
||||||
RUN apt-get install -y ffmpeg
|
|
||||||
|
|
||||||
RUN whisper --model medium --language ru dummy.wav; exit 0
|
RUN whisper --model medium --language ru dummy.wav; exit 0 && \
|
||||||
RUN whisper --model small --language ru dummy.wav; exit 0
|
whisper --model small --language ru dummy.wav; exit 0
|
||||||
|
|
||||||
COPY . .
|
COPY src/ src/
|
||||||
|
|
||||||
|
# Separate requirements installation to keep other dependencies
|
||||||
|
# in cache
|
||||||
|
COPY ${PIP_REQ_FILE} ${PIP_REQ_FILE}
|
||||||
|
RUN pip3 install -r ${PIP_REQ_FILE}
|
||||||
|
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
ENV FLASK_APP=src/app.py
|
|
||||||
|
|
||||||
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]
|
||||||
|
7
Makefile
Normal file
7
Makefile
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
run:
|
||||||
|
PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
|
||||||
|
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
|
||||||
|
CUDA_LAUNCH_BLOCKING=1 \
|
||||||
|
FLASK_APP=src/app.py \
|
||||||
|
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
|
||||||
|
flask run --host=0.0.0.0
|
@ -1,8 +1,62 @@
|
|||||||
flask==3.0.2
|
asgiref==3.7.2
|
||||||
Jinja2==3.1.3
|
av==11.0.0
|
||||||
blinker==1.7.0
|
blinker==1.7.0
|
||||||
Werkzeug==3.0.1
|
certifi==2024.2.2
|
||||||
|
charset-normalizer==3.3.2
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
|
coloredlogs==15.0.1
|
||||||
|
ctranslate2==4.0.0
|
||||||
|
Cython==3.0.8
|
||||||
|
dtw-python==1.3.1
|
||||||
|
faster-whisper==1.0.0
|
||||||
|
filelock==3.13.1
|
||||||
|
Flask==3.0.2
|
||||||
|
flatbuffers==23.5.26
|
||||||
|
fsspec==2024.2.0
|
||||||
|
huggingface-hub==0.21.3
|
||||||
|
humanfriendly==10.0
|
||||||
|
idna==3.6
|
||||||
itsdangerous==2.1.2
|
itsdangerous==2.1.2
|
||||||
|
Jinja2==3.1.3
|
||||||
|
llvmlite==0.42.0
|
||||||
MarkupSafe==2.1.5
|
MarkupSafe==2.1.5
|
||||||
|
more-itertools==10.2.0
|
||||||
|
mpmath==1.3.0
|
||||||
|
networkx==3.2.1
|
||||||
|
numba==0.59.0
|
||||||
|
numpy==1.26.4
|
||||||
|
nvidia-cublas-cu12==12.1.3.1
|
||||||
|
nvidia-cuda-cupti-cu12==12.1.105
|
||||||
|
nvidia-cuda-nvrtc-cu12==12.1.105
|
||||||
|
nvidia-cuda-runtime-cu12==12.1.105
|
||||||
|
nvidia-cudnn-cu12==8.9.2.26
|
||||||
|
nvidia-cufft-cu12==11.0.2.54
|
||||||
|
nvidia-curand-cu12==10.3.2.106
|
||||||
|
nvidia-cusolver-cu12==11.4.5.107
|
||||||
|
nvidia-cusparse-cu12==12.1.0.106
|
||||||
|
nvidia-nccl-cu12==2.19.3
|
||||||
|
nvidia-nvjitlink-cu12==12.3.101
|
||||||
|
nvidia-nvtx-cu12==12.1.105
|
||||||
|
onnxruntime==1.17.1
|
||||||
|
openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
|
||||||
|
packaging==23.2
|
||||||
|
pillow==10.2.0
|
||||||
|
protobuf==4.25.3
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
|
PyYAML==6.0.1
|
||||||
|
regex==2023.12.25
|
||||||
|
requests==2.31.0
|
||||||
|
scipy==1.12.0
|
||||||
|
six==1.16.0
|
||||||
|
sympy==1.12
|
||||||
|
tiktoken==0.6.0
|
||||||
|
tokenizers==0.15.2
|
||||||
|
torch==2.2.1
|
||||||
|
torchaudio==2.2.1
|
||||||
|
torchvision==0.17.1
|
||||||
|
tqdm==4.66.2
|
||||||
|
triton==2.2.0
|
||||||
|
typing_extensions==4.10.0
|
||||||
|
urllib3==2.2.1
|
||||||
|
Werkzeug==3.0.1
|
||||||
|
whisper-timestamped==1.15.0
|
||||||
|
108
src/app.py
108
src/app.py
@ -1,67 +1,131 @@
|
|||||||
|
from ctranslate2.extensions import asyncio
|
||||||
from flask import Flask, abort, request
|
from flask import Flask, abort, request
|
||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
from torch.functional import Tensor
|
||||||
import whisper
|
import whisper
|
||||||
import torch
|
import torch
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
from test_utils import elapsed_time
|
||||||
|
from whisper_timestamped import transcribe_timestamped
|
||||||
|
from multiprocessing import Process
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
model_size = "small"
|
||||||
HARPYIA_PROMPT = os.getenv('HARPYIA_PROMPT') or 'спасите помогите на помощь пожар'
|
# tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, distil-small.en
|
||||||
HARPYIA_MODEL = os.getenv('HARPYIA_MODEL') or 'medium'
|
HARPYIA_PROMPT = os.getenv("HARPYIA_PROMPT") or "спасите помогите на помощь пожар"
|
||||||
HARPYIA_LANGUAGE = os.getenv('HARPYIA_LANGUAGE') or 'ru'
|
HARPYIA_MODEL = os.getenv("HARPYIA_MODEL") or "medium"
|
||||||
|
HARPYIA_LANGUAGE = os.getenv("HARPYIA_LANGUAGE") or "ru"
|
||||||
|
|
||||||
# Check if NVIDIA GPU is available
|
# Check if NVIDIA GPU is available
|
||||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
DEVICE = "cpu"
|
||||||
|
|
||||||
# Load the Whisper model:
|
# Load the Whisper model:
|
||||||
model = whisper.load_model(HARPYIA_MODEL, device=DEVICE)
|
model = WhisperModel(
|
||||||
|
model_size,
|
||||||
|
device=DEVICE,
|
||||||
|
num_workers=6,
|
||||||
|
cpu_threads=10,
|
||||||
|
# in_memory=True,
|
||||||
|
)
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def hello():
|
def hello():
|
||||||
return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route."
|
return "To recognize an audio file, upload it using a POST request with '/recognize' or '/recognize_number' route."
|
||||||
|
|
||||||
|
|
||||||
def recognize_files(handler_fn):
|
def recognize_files(handler_fn):
|
||||||
if not request.files:
|
if not request.files:
|
||||||
abort(400)
|
abort(400)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
asyncio.get_running_loop()
|
||||||
for filename, handle in request.files.items():
|
for filename, handle in request.files.items():
|
||||||
temp = NamedTemporaryFile()
|
temp = NamedTemporaryFile()
|
||||||
handle.save(temp)
|
handle.save(temp)
|
||||||
result = model.transcribe(temp.name, language=HARPYIA_LANGUAGE, initial_prompt=HARPYIA_PROMPT)
|
|
||||||
results.append({
|
audio = prepare_file(temp.name)
|
||||||
'filename': filename,
|
res = trans(audio)
|
||||||
'transcript': handler_fn(result['text']),
|
|
||||||
})
|
results.append(
|
||||||
|
{
|
||||||
|
"filename": filename,
|
||||||
|
"transcript": res,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
print(results, file=sys.stderr)
|
print(results, file=sys.stderr)
|
||||||
return {'results': results}
|
return {"results": results}
|
||||||
|
|
||||||
@app.route('/recognize', methods=['POST'])
|
|
||||||
def recognize():
|
initprompt = [
|
||||||
|
"один",
|
||||||
|
"два",
|
||||||
|
"три",
|
||||||
|
"четыре",
|
||||||
|
"пять",
|
||||||
|
"шесть",
|
||||||
|
"семь",
|
||||||
|
"восемь",
|
||||||
|
"девять",
|
||||||
|
"десять",
|
||||||
|
"одинадцать",
|
||||||
|
"двенадцать",
|
||||||
|
"тренадцать",
|
||||||
|
"сто",
|
||||||
|
"сот",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@elapsed_time
|
||||||
|
def trans(audio):
|
||||||
|
|
||||||
|
segments, _ = model.transcribe(
|
||||||
|
audio,
|
||||||
|
language=HARPYIA_LANGUAGE,
|
||||||
|
initial_prompt="семь сот сто",
|
||||||
|
condition_on_previous_text=False,
|
||||||
|
vad_filter=True,
|
||||||
|
beam_size=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
words = []
|
||||||
|
for e in list(segments):
|
||||||
|
words.append(e.text)
|
||||||
|
|
||||||
|
return " ".join(words)
|
||||||
|
|
||||||
|
|
||||||
|
@elapsed_time
|
||||||
|
def prepare_file(filename: str):
|
||||||
|
audio = whisper.load_audio(filename, sr=16000)
|
||||||
|
audio = whisper.pad_or_trim(audio)
|
||||||
|
return audio
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/recognize", methods=["POST"])
|
||||||
|
async def recognize():
|
||||||
return recognize_files(lambda text: text)
|
return recognize_files(lambda text: text)
|
||||||
|
|
||||||
@app.route('/recognize_number', methods=['POST'])
|
|
||||||
|
@app.route("/recognize_number", methods=["POST"])
|
||||||
def recognize_number():
|
def recognize_number():
|
||||||
return recognize_files(transfer_and_clean)
|
return recognize_files(transfer_and_clean)
|
||||||
|
|
||||||
|
|
||||||
def transfer_and_clean(input_string):
|
def transfer_and_clean(input_string):
|
||||||
number_mapping = {
|
number_mapping = {"один": "1", "два": "2", "три": "3"}
|
||||||
"один": "1",
|
|
||||||
"два": "2",
|
|
||||||
"три": "3"
|
|
||||||
}
|
|
||||||
|
|
||||||
for word, number in number_mapping.items():
|
for word, number in number_mapping.items():
|
||||||
input_string = input_string.replace(word, number)
|
input_string = input_string.replace(word, number)
|
||||||
|
|
||||||
input_string = re.sub(r'[^\d]+', '', input_string)
|
input_string = re.sub(r"[^\d]+", "", input_string)
|
||||||
|
|
||||||
return input_string
|
return input_string
|
||||||
|
|
||||||
|
39
src/test_utils.py
Normal file
39
src/test_utils.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def elapsed_time_wrapper(unique_id: str = ""):
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
start_time = time.time()
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
end_time = time.time()
|
||||||
|
execution_time = end_time - start_time
|
||||||
|
if not unique_id == "":
|
||||||
|
print(
|
||||||
|
f"[{unique_id}] Executed in {execution_time} seconds",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(f"Executed in {execution_time} seconds", file=sys.stderr)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def elapsed_time(func):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
start_time = time.time()
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
end_time = time.time()
|
||||||
|
execution_time = end_time - start_time
|
||||||
|
print(
|
||||||
|
f"Executed in {execution_time} seconds",
|
||||||
|
sep="\n",
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return wrapper
|
Loading…
x
Reference in New Issue
Block a user