Initial commit

This commit is contained in:
Sviatoslav Tsariov Yurievich 2024-06-21 20:28:21 +03:00
commit daa2b218b2
10 changed files with 723 additions and 0 deletions

142
.dockerignore Normal file
View File

@ -0,0 +1,142 @@
.git
Dockerfile
.DS_Store
.gitignore
.dockerignore
/credentials
/cache
/store
/node_modules
# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
# General
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/

163
.gitignore vendored Normal file
View File

@ -0,0 +1,163 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
temp/
recordings/

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM python:3.10-slim
WORKDIR /app
COPY requirements.txt /app
RUN pip3 install -r requirements.txt
COPY . .
ENV FLASK_APP=src/app.py
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8081"]

14
docker-compose.yaml Normal file
View File

@ -0,0 +1,14 @@
version: '2.1'
services:
rat:
container_name: rat
image: rat
build: .
network_mode: host
volumes:
- "./src:/app/src"
- "./temp:/app/temp"
- "./recordings:/app/recordings"
environment:
CLIENT_URL: "http://192.168.0.119:5000"
HARPYIA_URL: "http://localhost:8080"

15
requirements.txt Normal file
View File

@ -0,0 +1,15 @@
blinker==1.7.0
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
Flask==3.0.2
idna==3.6
itsdangerous==2.1.2
Jinja2==3.1.3
MarkupSafe==2.1.5
numpy==2.0.0
python-dotenv==1.0.1
requests==2.31.0
scipy==1.13.1
urllib3==2.2.1
Werkzeug==3.0.1

201
src/app.py Normal file
View File

@ -0,0 +1,201 @@
from flask import Flask, abort, request, jsonify
from tempfile import NamedTemporaryFile
from datetime import datetime
from dotenv import load_dotenv
import os
import sys
import requests
import base64
import io
import time
import glob
import traceback
from datetime import datetime
from wav_recorder import WavRecorder
from wav_composer import WavComposer
from channel_connector import ChannelConnector
load_dotenv()
CLIENT_URL = os.getenv('CLIENT_URL') or ''
HARPYIA_URL = os.getenv('HARPYIA_URL') or ''
app = Flask(__name__)
file_number = 1
temp_directory = 'temp/'
recordings_directory = 'recordings/'
def prepare_dirs():
os.makedirs(os.path.dirname(temp_directory), exist_ok=True)
os.makedirs(os.path.dirname(recordings_directory), exist_ok=True)
files = glob.glob(temp_directory + "/*")
for f in files:
os.remove(f)
prepare_dirs()
wav_recorder = WavRecorder(recordings_directory)
wav_composer = WavComposer(16000)
channel_connector = ChannelConnector(16000)
@app.route("/")
def hello():
return "To recognize an audio file, upload it using a POST request with '/save_audio' route."
@app.route("/time")
def get_time():
current_timestamp = int(time.time())
return {"current_timestamp": current_timestamp}
@app.route("/aid/ready", methods=['POST'])
def count():
# requests.post(CLIENT_URL + '/state', json={'state': 1})
return jsonify({}), 200
@app.route('/aid/transcript', methods=['POST'])
def handler():
data = request.json
audio_content = data['audio']['content']
channel = data['audio']['channel']
data['audio']['content'] = ''
print(data, file=sys.stderr)
initial_time = data['audio']['initialTime']
micros = data['audio']['micros']
# Decode base64 audio content
audio_bytes = base64.b64decode(audio_content)
# Save the audio as a WAV file
buf = io.BytesIO()
wav_composer.compose(buf, audio_bytes, channel=channel)
buf.seek(0)
try:
requests.post(CLIENT_URL + '/state', json={'state': 0})
files = {'file': buf}
response = requests.post(HARPYIA_URL + '/recognize', files=files)
if response.status_code == 200:
data = response.json()
results = data.get('results', [])
transcript = ''
for entity in results:
transcript = f'{transcript} {entity.get("transcript")}'
record_time = initial_time + int(micros / 1000000)
record_time_str = datetime.fromtimestamp(record_time).strftime('%Y-%m-%d %H:%M:%S')
record_millis_str = str(int(micros / 1000) % 1000).zfill(3)
record_micros_str = str(micros % 1000).zfill(3)
message = f'[{record_time_str}:{record_millis_str}:{record_micros_str}] {transcript}'
print(message, file=sys.stderr)
requests.post(CLIENT_URL + '/message', json={'message': message, 'results': entity.get('results')})
return jsonify({'message': transcript})
else:
return jsonify({'error': 'Error occurred on Harpyia'}), 500
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/aid/save', methods=['POST'])
def save_audio():
try:
data = request.json
audio_content = data['audio']['content']
micros = data['audio']['micros']
initial_time = data['audio']['initialTime']
record_time = initial_time + int(micros / 1000000)
record_millis_str = str(int(micros / 1000) % 1000).zfill(3)
record_micros_str = str(micros % 1000).zfill(3)
recording_time = datetime.fromtimestamp(record_time).strftime("%d_%m_%Y__%H_%M_%S")
recording_time += f'_{record_millis_str}_{record_micros_str}'
channel = data['audio']['channel']
save_path = compose_save_path(recording_time, channel)
audio_bytes = base64.b64decode(audio_content)
wav_composer.compose(save_path, audio_bytes)
audio_bytes = wav_composer.remove_initial_segment(audio_bytes, 100)
channel_connector.add_audio(audio_bytes, channel)
return jsonify({'status': 'success', 'message': 'Audio saved successfully'})
except Exception as e:
print(traceback.format_exc(), file=sys.stderr)
return jsonify({'status': 'error', 'message': str(e)})
def compose_save_path(recording_time, channel):
global file_number
save_path = f'{temp_directory}saved_audio{str(file_number)}__channel{channel}__{recording_time}.wav'
print(f'The audio file will be saved to {save_path}', file=sys.stderr)
file_number += 1
return save_path
@app.route("/recorder/client/start")
def start_recording():
try:
data = request.json
audio_content = data['content']
wav_recorder.start_recording()
return jsonify({'status': 'success', 'message': 'Audio file saved'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/cheeze/check")
def check_recording():
try:
return jsonify({'status': 'success', 'is_recording': wav_recorder.is_recording()})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/cheeze/send_header", methods=['POST'])
def receive_header():
try:
data = request.json
audio_header = data['header']
wav_recorder.set_header(audio_header)
return jsonify({'status': 'success', 'message': 'Header received'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/cheeze/send_part", methods=['POST'])
def receive_part():
try:
data = request.json
audio_content = data['content']
wav_recorder.append_data(audio_content)
return jsonify({'status': 'success', 'message': 'Content part received'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/client/stop")
def stop_recording():
try:
data = request.json
audio_content = data['content']
buf = io.BytesIO()
wav_recorder.save_recording(buf=buf)
buf.seek(0)
return jsonify({'status': 'success', 'content': buf})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)})

50
src/channel_connector.py Normal file
View File

@ -0,0 +1,50 @@
import audioop
import sys
from datetime import datetime
import threading
from sound_localizer import SoundLocalizer
class ChannelConnector:
def __init__(self, sample_rate, sample_width=2):
self.cached_audios = {}
self.previous_time = None
self.lock = threading.Lock()
self.timer = None
self.sample_width = sample_width
self.sound_localizer = SoundLocalizer(sample_rate)
def add_audio(self, audio_bytes, channel):
with self.lock:
self.cached_audios[channel] = audio_bytes
if self.previous_time is None:
self.previous_time = datetime.now()
# Schedule the reset and comparison after 1 second
self.timer = threading.Timer(1.0, self.reset_and_compare)
self.timer.start()
def reset_and_compare(self):
with self.lock:
if 0 in self.cached_audios and 1 in self.cached_audios:
self.compare(self.cached_audios[0], self.cached_audios[1])
self.reset()
def reset(self):
self.previous_time = None
self.cached_audios = {}
if self.timer:
self.timer.cancel()
self.timer = None
def compare(self, audio_left, audio_right):
left_rms = audioop.rms(audio_left, self.sample_width)
right_rms = audioop.rms(audio_right, self.sample_width)
if left_rms > 30 or right_rms > 30:
print(f"Left channel RMS: {left_rms}", file=sys.stderr)
print(f"Right channel RMS: {right_rms}", file=sys.stderr)
print('left' if left_rms > right_rms else 'right', file=sys.stderr)
angle = self.sound_localizer.estimate_source_angle(audio_left, audio_right)
print(f'Approximate angle: {angle}', file=sys.stderr)

52
src/sound_localizer.py Normal file
View File

@ -0,0 +1,52 @@
import numpy as np
from scipy.signal import correlate
import math
import sys
class SoundLocalizer:
def __init__(self, sample_rate, channels=2):
self.sample_rate = sample_rate
self.channels = channels
def _compute_tdoa(self, sig1, sig2, framerate):
correlation = correlate(sig1, sig2, mode='full')
lag = np.argmax(correlation) - (len(sig1) - 1)
tdoa = lag / framerate
return tdoa
def _calculate_angle(self, tdoa, speed_of_sound, distance_between_mics, mic_angles):
angles = []
normalized_tdoa = tdoa * speed_of_sound / distance_between_mics
normalized_tdoa = np.clip(normalized_tdoa, -1, 1)
tdoa_angle = math.asin(normalized_tdoa) * 180 / math.pi
for angle in mic_angles:
angles.append(angle + tdoa_angle)
return np.mean(angles)
def estimate_source_angle(self, audio_left, audio_right, \
speed_of_sound=343.0, distance_between_mics=0.1):
audio_left = np.frombuffer(audio_left, dtype=np.int16)
audio_left = audio_left.reshape(-1, self.channels)
audio_right = np.frombuffer(audio_right, dtype=np.int16)
audio_right = audio_right.reshape(-1, self.channels)
mic1 = audio_left[:, 0]
mic2 = audio_left[:, 1]
mic3 = audio_right[:, 0]
mic4 = audio_right[:, 1]
tdoa_12 = self._compute_tdoa(mic1, mic2, self.sample_rate)
tdoa_13 = self._compute_tdoa(mic1, mic3, self.sample_rate)
tdoa_14 = self._compute_tdoa(mic1, mic4, self.sample_rate)
print(tdoa_12, tdoa_13, tdoa_14, file=sys.stderr)
mic_angles = [-33, -11, 11, 33]
angle_12 = self._calculate_angle(tdoa_12, speed_of_sound, distance_between_mics, mic_angles[:2])
angle_13 = self._calculate_angle(tdoa_13, speed_of_sound, distance_between_mics, mic_angles[:3])
angle_14 = self._calculate_angle(tdoa_14, speed_of_sound, distance_between_mics, mic_angles)
final_angle = np.mean([angle_12, angle_13, angle_14])
return final_angle

34
src/wav_composer.py Normal file
View File

@ -0,0 +1,34 @@
import wave
import audioop
class WavComposer:
def __init__(self, sample_rate, channels=2, sample_width=2):
self.sample_rate = sample_rate
self.channels = channels
self.sample_width = sample_width
def compose(self, save_path, audio_bytes):
with wave.open(save_path, 'wb') as wav_file:
wav_file.setnchannels(self.channels) # Stereo audio
wav_file.setsampwidth(self.sample_width) # 16-bit audio
wav_file.setframerate(self.sample_rate) # Sample rate
#left_channel = audioop.tomono(audio_bytes, 2, 1, 0)
#right_channel = audioop.tomono(audio_bytes, 2, 0, 1)
audio_bytes = audioop.mul(audio_bytes, 2, 40)
#left_rms = audioop.rms(left_channel, 2)
#right_rms = audioop.rms(right_channel, 2)
#print(f"Left channel RMS: {left_rms}", file=sys.stderr)
#print(f"Right channel RMS: {right_rms}", file=sys.stderr)
#print('left' if left_rms > right_rms else 'right', file=sys.stderr)
wav_file.writeframesraw(audio_bytes)
def remove_initial_segment(self, audio_bytes, ms):
bytes_per_sample = self.sample_width * self.channels
num_samples_to_remove = int((ms / 1000.0) * self.sample_rate)
bytes_to_remove = num_samples_to_remove * bytes_per_sample
return audio_bytes[bytes_to_remove:]

40
src/wav_recorder.py Normal file
View File

@ -0,0 +1,40 @@
from wav_composer import WavComposer
import datetime
class WavRecorder:
def __init__(self, output_directory):
self._header = b''
self._data_chunks = []
self._output_directory = output_directory
self._is_recording = False
def start_recording(self):
self._is_recording = True
def set_header(self, header):
self._header = header
def append_data(self, data):
if not self._is_recording:
raise RuntimeError("Cannot append data. Recorder is not recording.")
self._data_chunks.append(data)
def save_recording(self, buf=None):
self._write_to_file(buf)
self._reset_recorder()
def _write_to_file(self, buf=None):
wav_data = self._header + b''.join(data for data in self._data_chunks)
output_path = buf if buf else f'{self._output_directory}/{filename}'
WavComposer.compose(output_path, wav_data)
print(f'Saved recording to {output_path}')
def _reset_recorder(self):
self._header = b''
self._data_chunks.clear()
self._is_recording = False
def is_recording(self):
return self._is_recording