From daa2b218b2b540e31a6c2efbb36af16756d7231e Mon Sep 17 00:00:00 2001 From: Sviatoslav Tsariov Date: Fri, 21 Jun 2024 20:28:21 +0300 Subject: [PATCH] Initial commit --- .dockerignore | 142 +++++++++++++++++++++++++++ .gitignore | 163 +++++++++++++++++++++++++++++++ Dockerfile | 12 +++ docker-compose.yaml | 14 +++ requirements.txt | 15 +++ src/app.py | 201 +++++++++++++++++++++++++++++++++++++++ src/channel_connector.py | 50 ++++++++++ src/sound_localizer.py | 52 ++++++++++ src/wav_composer.py | 34 +++++++ src/wav_recorder.py | 40 ++++++++ 10 files changed, 723 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 docker-compose.yaml create mode 100644 requirements.txt create mode 100644 src/app.py create mode 100644 src/channel_connector.py create mode 100644 src/sound_localizer.py create mode 100644 src/wav_composer.py create mode 100644 src/wav_recorder.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..763bdb6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,142 @@ +.git +Dockerfile +.DS_Store +.gitignore +.dockerignore + +/credentials +/cache +/store + +/node_modules + +# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore + +# General +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5640277 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +temp/ +recordings/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e94666b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.10-slim + +WORKDIR /app + +COPY requirements.txt /app +RUN pip3 install -r requirements.txt + +COPY . . + +ENV FLASK_APP=src/app.py + +CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8081"] diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..f0fbf51 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,14 @@ +version: '2.1' +services: + rat: + container_name: rat + image: rat + build: . + network_mode: host + volumes: + - "./src:/app/src" + - "./temp:/app/temp" + - "./recordings:/app/recordings" + environment: + CLIENT_URL: "http://192.168.0.119:5000" + HARPYIA_URL: "http://localhost:8080" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c180c79 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +blinker==1.7.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +Flask==3.0.2 +idna==3.6 +itsdangerous==2.1.2 +Jinja2==3.1.3 +MarkupSafe==2.1.5 +numpy==2.0.0 +python-dotenv==1.0.1 +requests==2.31.0 +scipy==1.13.1 +urllib3==2.2.1 +Werkzeug==3.0.1 diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..f109c2e --- /dev/null +++ b/src/app.py @@ -0,0 +1,201 @@ +from flask import Flask, abort, request, jsonify +from tempfile import NamedTemporaryFile +from datetime import datetime + +from dotenv import load_dotenv + +import os +import sys +import requests +import base64 +import io +import time +import glob +import traceback + +from datetime import datetime + +from wav_recorder import WavRecorder +from wav_composer import WavComposer +from channel_connector import ChannelConnector + +load_dotenv() + +CLIENT_URL = os.getenv('CLIENT_URL') or '' +HARPYIA_URL = os.getenv('HARPYIA_URL') or '' + +app = Flask(__name__) + +file_number = 1 +temp_directory = 'temp/' +recordings_directory = 'recordings/' + +def prepare_dirs(): + os.makedirs(os.path.dirname(temp_directory), exist_ok=True) + os.makedirs(os.path.dirname(recordings_directory), exist_ok=True) + files = glob.glob(temp_directory + "/*") + for f in files: + os.remove(f) + +prepare_dirs() + +wav_recorder = WavRecorder(recordings_directory) +wav_composer = WavComposer(16000) +channel_connector = ChannelConnector(16000) + +@app.route("/") +def hello(): + return "To recognize an audio file, upload it using a POST request with '/save_audio' route." + +@app.route("/time") +def get_time(): + current_timestamp = int(time.time()) + return {"current_timestamp": current_timestamp} + +@app.route("/aid/ready", methods=['POST']) +def count(): + # requests.post(CLIENT_URL + '/state', json={'state': 1}) + return jsonify({}), 200 + +@app.route('/aid/transcript', methods=['POST']) +def handler(): + data = request.json + audio_content = data['audio']['content'] + channel = data['audio']['channel'] + + data['audio']['content'] = '' + print(data, file=sys.stderr) + + initial_time = data['audio']['initialTime'] + micros = data['audio']['micros'] + + # Decode base64 audio content + audio_bytes = base64.b64decode(audio_content) + + # Save the audio as a WAV file + buf = io.BytesIO() + wav_composer.compose(buf, audio_bytes, channel=channel) + buf.seek(0) + + try: + requests.post(CLIENT_URL + '/state', json={'state': 0}) + + files = {'file': buf} + response = requests.post(HARPYIA_URL + '/recognize', files=files) + if response.status_code == 200: + data = response.json() + results = data.get('results', []) + transcript = '' + + for entity in results: + transcript = f'{transcript} {entity.get("transcript")}' + + record_time = initial_time + int(micros / 1000000) + record_time_str = datetime.fromtimestamp(record_time).strftime('%Y-%m-%d %H:%M:%S') + record_millis_str = str(int(micros / 1000) % 1000).zfill(3) + record_micros_str = str(micros % 1000).zfill(3) + message = f'[{record_time_str}:{record_millis_str}:{record_micros_str}] {transcript}' + print(message, file=sys.stderr) + + requests.post(CLIENT_URL + '/message', json={'message': message, 'results': entity.get('results')}) + + return jsonify({'message': transcript}) + else: + return jsonify({'error': 'Error occurred on Harpyia'}), 500 + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@app.route('/aid/save', methods=['POST']) +def save_audio(): + try: + data = request.json + audio_content = data['audio']['content'] + + micros = data['audio']['micros'] + initial_time = data['audio']['initialTime'] + record_time = initial_time + int(micros / 1000000) + record_millis_str = str(int(micros / 1000) % 1000).zfill(3) + record_micros_str = str(micros % 1000).zfill(3) + recording_time = datetime.fromtimestamp(record_time).strftime("%d_%m_%Y__%H_%M_%S") + recording_time += f'_{record_millis_str}_{record_micros_str}' + + channel = data['audio']['channel'] + + save_path = compose_save_path(recording_time, channel) + audio_bytes = base64.b64decode(audio_content) + + wav_composer.compose(save_path, audio_bytes) + audio_bytes = wav_composer.remove_initial_segment(audio_bytes, 100) + channel_connector.add_audio(audio_bytes, channel) + + return jsonify({'status': 'success', 'message': 'Audio saved successfully'}) + except Exception as e: + print(traceback.format_exc(), file=sys.stderr) + return jsonify({'status': 'error', 'message': str(e)}) + +def compose_save_path(recording_time, channel): + global file_number + + save_path = f'{temp_directory}saved_audio{str(file_number)}__channel{channel}__{recording_time}.wav' + print(f'The audio file will be saved to {save_path}', file=sys.stderr) + + file_number += 1 + return save_path + +@app.route("/recorder/client/start") +def start_recording(): + try: + data = request.json + audio_content = data['content'] + + wav_recorder.start_recording() + + return jsonify({'status': 'success', 'message': 'Audio file saved'}) + except Exception as e: + return jsonify({'status': 'error', 'message': str(e)}) + +@app.route("/recorder/cheeze/check") +def check_recording(): + try: + return jsonify({'status': 'success', 'is_recording': wav_recorder.is_recording()}) + except Exception as e: + return jsonify({'status': 'error', 'message': str(e)}) + +@app.route("/recorder/cheeze/send_header", methods=['POST']) +def receive_header(): + try: + data = request.json + audio_header = data['header'] + + wav_recorder.set_header(audio_header) + + return jsonify({'status': 'success', 'message': 'Header received'}) + except Exception as e: + return jsonify({'status': 'error', 'message': str(e)}) + +@app.route("/recorder/cheeze/send_part", methods=['POST']) +def receive_part(): + try: + data = request.json + audio_content = data['content'] + + wav_recorder.append_data(audio_content) + + return jsonify({'status': 'success', 'message': 'Content part received'}) + except Exception as e: + return jsonify({'status': 'error', 'message': str(e)}) + +@app.route("/recorder/client/stop") +def stop_recording(): + try: + data = request.json + audio_content = data['content'] + + buf = io.BytesIO() + wav_recorder.save_recording(buf=buf) + buf.seek(0) + + return jsonify({'status': 'success', 'content': buf}) + except Exception as e: + return jsonify({'status': 'error', 'message': str(e)}) + diff --git a/src/channel_connector.py b/src/channel_connector.py new file mode 100644 index 0000000..d9d51e6 --- /dev/null +++ b/src/channel_connector.py @@ -0,0 +1,50 @@ +import audioop +import sys +from datetime import datetime +import threading + +from sound_localizer import SoundLocalizer + +class ChannelConnector: + def __init__(self, sample_rate, sample_width=2): + self.cached_audios = {} + self.previous_time = None + self.lock = threading.Lock() + self.timer = None + self.sample_width = sample_width + self.sound_localizer = SoundLocalizer(sample_rate) + + def add_audio(self, audio_bytes, channel): + with self.lock: + self.cached_audios[channel] = audio_bytes + + if self.previous_time is None: + self.previous_time = datetime.now() + # Schedule the reset and comparison after 1 second + self.timer = threading.Timer(1.0, self.reset_and_compare) + self.timer.start() + + def reset_and_compare(self): + with self.lock: + if 0 in self.cached_audios and 1 in self.cached_audios: + self.compare(self.cached_audios[0], self.cached_audios[1]) + self.reset() + + def reset(self): + self.previous_time = None + self.cached_audios = {} + if self.timer: + self.timer.cancel() + self.timer = None + + def compare(self, audio_left, audio_right): + left_rms = audioop.rms(audio_left, self.sample_width) + right_rms = audioop.rms(audio_right, self.sample_width) + + if left_rms > 30 or right_rms > 30: + print(f"Left channel RMS: {left_rms}", file=sys.stderr) + print(f"Right channel RMS: {right_rms}", file=sys.stderr) + print('left' if left_rms > right_rms else 'right', file=sys.stderr) + + angle = self.sound_localizer.estimate_source_angle(audio_left, audio_right) + print(f'Approximate angle: {angle}', file=sys.stderr) diff --git a/src/sound_localizer.py b/src/sound_localizer.py new file mode 100644 index 0000000..6d1e2f0 --- /dev/null +++ b/src/sound_localizer.py @@ -0,0 +1,52 @@ +import numpy as np +from scipy.signal import correlate +import math +import sys + +class SoundLocalizer: + def __init__(self, sample_rate, channels=2): + self.sample_rate = sample_rate + self.channels = channels + + def _compute_tdoa(self, sig1, sig2, framerate): + correlation = correlate(sig1, sig2, mode='full') + lag = np.argmax(correlation) - (len(sig1) - 1) + tdoa = lag / framerate + return tdoa + + def _calculate_angle(self, tdoa, speed_of_sound, distance_between_mics, mic_angles): + angles = [] + normalized_tdoa = tdoa * speed_of_sound / distance_between_mics + normalized_tdoa = np.clip(normalized_tdoa, -1, 1) + tdoa_angle = math.asin(normalized_tdoa) * 180 / math.pi + for angle in mic_angles: + angles.append(angle + tdoa_angle) + return np.mean(angles) + + def estimate_source_angle(self, audio_left, audio_right, \ + speed_of_sound=343.0, distance_between_mics=0.1): + + audio_left = np.frombuffer(audio_left, dtype=np.int16) + audio_left = audio_left.reshape(-1, self.channels) + audio_right = np.frombuffer(audio_right, dtype=np.int16) + audio_right = audio_right.reshape(-1, self.channels) + + mic1 = audio_left[:, 0] + mic2 = audio_left[:, 1] + mic3 = audio_right[:, 0] + mic4 = audio_right[:, 1] + + tdoa_12 = self._compute_tdoa(mic1, mic2, self.sample_rate) + tdoa_13 = self._compute_tdoa(mic1, mic3, self.sample_rate) + tdoa_14 = self._compute_tdoa(mic1, mic4, self.sample_rate) + print(tdoa_12, tdoa_13, tdoa_14, file=sys.stderr) + + mic_angles = [-33, -11, 11, 33] + + angle_12 = self._calculate_angle(tdoa_12, speed_of_sound, distance_between_mics, mic_angles[:2]) + angle_13 = self._calculate_angle(tdoa_13, speed_of_sound, distance_between_mics, mic_angles[:3]) + angle_14 = self._calculate_angle(tdoa_14, speed_of_sound, distance_between_mics, mic_angles) + + final_angle = np.mean([angle_12, angle_13, angle_14]) + + return final_angle diff --git a/src/wav_composer.py b/src/wav_composer.py new file mode 100644 index 0000000..c6e243d --- /dev/null +++ b/src/wav_composer.py @@ -0,0 +1,34 @@ +import wave +import audioop + +class WavComposer: + def __init__(self, sample_rate, channels=2, sample_width=2): + self.sample_rate = sample_rate + self.channels = channels + self.sample_width = sample_width + + def compose(self, save_path, audio_bytes): + with wave.open(save_path, 'wb') as wav_file: + wav_file.setnchannels(self.channels) # Stereo audio + wav_file.setsampwidth(self.sample_width) # 16-bit audio + wav_file.setframerate(self.sample_rate) # Sample rate + + #left_channel = audioop.tomono(audio_bytes, 2, 1, 0) + #right_channel = audioop.tomono(audio_bytes, 2, 0, 1) + + audio_bytes = audioop.mul(audio_bytes, 2, 40) + + #left_rms = audioop.rms(left_channel, 2) + #right_rms = audioop.rms(right_channel, 2) + + #print(f"Left channel RMS: {left_rms}", file=sys.stderr) + #print(f"Right channel RMS: {right_rms}", file=sys.stderr) + #print('left' if left_rms > right_rms else 'right', file=sys.stderr) + + wav_file.writeframesraw(audio_bytes) + + def remove_initial_segment(self, audio_bytes, ms): + bytes_per_sample = self.sample_width * self.channels + num_samples_to_remove = int((ms / 1000.0) * self.sample_rate) + bytes_to_remove = num_samples_to_remove * bytes_per_sample + return audio_bytes[bytes_to_remove:] \ No newline at end of file diff --git a/src/wav_recorder.py b/src/wav_recorder.py new file mode 100644 index 0000000..f647cf4 --- /dev/null +++ b/src/wav_recorder.py @@ -0,0 +1,40 @@ +from wav_composer import WavComposer +import datetime + +class WavRecorder: + def __init__(self, output_directory): + self._header = b'' + self._data_chunks = [] + self._output_directory = output_directory + self._is_recording = False + + def start_recording(self): + self._is_recording = True + + def set_header(self, header): + self._header = header + + def append_data(self, data): + if not self._is_recording: + raise RuntimeError("Cannot append data. Recorder is not recording.") + self._data_chunks.append(data) + + def save_recording(self, buf=None): + self._write_to_file(buf) + self._reset_recorder() + + def _write_to_file(self, buf=None): + wav_data = self._header + b''.join(data for data in self._data_chunks) + output_path = buf if buf else f'{self._output_directory}/{filename}' + WavComposer.compose(output_path, wav_data) + + print(f'Saved recording to {output_path}') + + def _reset_recorder(self): + self._header = b'' + self._data_chunks.clear() + self._is_recording = False + + def is_recording(self): + return self._is_recording +