From daa2b218b2b540e31a6c2efbb36af16756d7231e Mon Sep 17 00:00:00 2001
From: Sviatoslav Tsariov <slava982007@gmail.com>
Date: Fri, 21 Jun 2024 20:28:21 +0300
Subject: [PATCH] Initial commit

---
 .dockerignore            | 142 +++++++++++++++++++++++++++
 .gitignore               | 163 +++++++++++++++++++++++++++++++
 Dockerfile               |  12 +++
 docker-compose.yaml      |  14 +++
 requirements.txt         |  15 +++
 src/app.py               | 201 +++++++++++++++++++++++++++++++++++++++
 src/channel_connector.py |  50 ++++++++++
 src/sound_localizer.py   |  52 ++++++++++
 src/wav_composer.py      |  34 +++++++
 src/wav_recorder.py      |  40 ++++++++
 10 files changed, 723 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 docker-compose.yaml
 create mode 100644 requirements.txt
 create mode 100644 src/app.py
 create mode 100644 src/channel_connector.py
 create mode 100644 src/sound_localizer.py
 create mode 100644 src/wav_composer.py
 create mode 100644 src/wav_recorder.py

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..763bdb6
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,142 @@
+.git
+Dockerfile
+.DS_Store
+.gitignore
+.dockerignore
+
+/credentials
+/cache
+/store
+
+/node_modules
+
+# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
+
+# General
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5640277
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,163 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+temp/
+recordings/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..e94666b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+COPY requirements.txt /app
+RUN pip3 install -r requirements.txt
+
+COPY . .
+
+ENV FLASK_APP=src/app.py
+
+CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8081"]
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 0000000..f0fbf51
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,14 @@
+version: '2.1'
+services:
+  rat:
+    container_name: rat
+    image: rat
+    build: .
+    network_mode: host
+    volumes:
+      - "./src:/app/src"
+      - "./temp:/app/temp"
+      - "./recordings:/app/recordings"
+    environment:
+      CLIENT_URL: "http://192.168.0.119:5000"
+      HARPYIA_URL: "http://localhost:8080"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c180c79
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,15 @@
+blinker==1.7.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+Flask==3.0.2
+idna==3.6
+itsdangerous==2.1.2
+Jinja2==3.1.3
+MarkupSafe==2.1.5
+numpy==2.0.0
+python-dotenv==1.0.1
+requests==2.31.0
+scipy==1.13.1
+urllib3==2.2.1
+Werkzeug==3.0.1
diff --git a/src/app.py b/src/app.py
new file mode 100644
index 0000000..f109c2e
--- /dev/null
+++ b/src/app.py
@@ -0,0 +1,201 @@
+from flask import Flask, abort, request, jsonify
+from tempfile import NamedTemporaryFile
+from datetime import datetime
+
+from dotenv import load_dotenv
+
+import os
+import sys
+import requests
+import base64
+import io
+import time
+import glob
+import traceback
+
+from datetime import datetime
+
+from wav_recorder import WavRecorder
+from wav_composer import WavComposer
+from channel_connector import ChannelConnector
+
+load_dotenv()
+
+CLIENT_URL = os.getenv('CLIENT_URL') or ''
+HARPYIA_URL = os.getenv('HARPYIA_URL') or ''
+
+app = Flask(__name__)
+
+file_number = 1
+temp_directory = 'temp/'
+recordings_directory = 'recordings/'
+
+def prepare_dirs():
+    os.makedirs(os.path.dirname(temp_directory), exist_ok=True)
+    os.makedirs(os.path.dirname(recordings_directory), exist_ok=True)
+    files = glob.glob(temp_directory + "/*")
+    for f in files:
+            os.remove(f)
+
+prepare_dirs()
+
+wav_recorder = WavRecorder(recordings_directory)
+wav_composer = WavComposer(16000)
+channel_connector = ChannelConnector(16000)
+
+@app.route("/")
+def hello():
+    return "To recognize an audio file, upload it using a POST request with '/save_audio' route."
+
+@app.route("/time")
+def get_time():
+    current_timestamp = int(time.time())
+    return {"current_timestamp": current_timestamp}
+
+@app.route("/aid/ready", methods=['POST'])
+def count():
+    # requests.post(CLIENT_URL + '/state', json={'state': 1})
+    return jsonify({}), 200
+
+@app.route('/aid/transcript', methods=['POST'])
+def handler():
+    data = request.json
+    audio_content = data['audio']['content']
+    channel = data['audio']['channel']
+
+    data['audio']['content'] = ''
+    print(data, file=sys.stderr)
+
+    initial_time = data['audio']['initialTime']
+    micros = data['audio']['micros']
+
+    # Decode base64 audio content
+    audio_bytes = base64.b64decode(audio_content)
+
+    # Save the audio as a WAV file
+    buf = io.BytesIO()
+    wav_composer.compose(buf, audio_bytes, channel=channel)
+    buf.seek(0)
+            
+    try:
+        requests.post(CLIENT_URL + '/state', json={'state': 0})
+
+        files = {'file': buf}
+        response = requests.post(HARPYIA_URL + '/recognize', files=files)
+        if response.status_code == 200:
+            data = response.json()
+            results = data.get('results', [])
+            transcript = ''
+
+            for entity in results:
+                transcript = f'{transcript} {entity.get("transcript")}'
+
+            record_time = initial_time + int(micros / 1000000)
+            record_time_str = datetime.fromtimestamp(record_time).strftime('%Y-%m-%d %H:%M:%S')
+            record_millis_str = str(int(micros / 1000) % 1000).zfill(3)
+            record_micros_str = str(micros % 1000).zfill(3)
+            message = f'[{record_time_str}:{record_millis_str}:{record_micros_str}] {transcript}'
+            print(message, file=sys.stderr)
+
+            requests.post(CLIENT_URL + '/message', json={'message': message, 'results': entity.get('results')})
+
+            return jsonify({'message': transcript})
+        else:
+            return jsonify({'error': 'Error occurred on Harpyia'}), 500
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/aid/save', methods=['POST'])
+def save_audio():
+    try:
+        data = request.json
+        audio_content = data['audio']['content']
+
+        micros = data['audio']['micros']
+        initial_time = data['audio']['initialTime']
+        record_time = initial_time + int(micros / 1000000)
+        record_millis_str = str(int(micros / 1000) % 1000).zfill(3)
+        record_micros_str = str(micros % 1000).zfill(3)
+        recording_time = datetime.fromtimestamp(record_time).strftime("%d_%m_%Y__%H_%M_%S")
+        recording_time += f'_{record_millis_str}_{record_micros_str}'
+
+        channel = data['audio']['channel']
+
+        save_path = compose_save_path(recording_time, channel)
+        audio_bytes = base64.b64decode(audio_content)
+
+        wav_composer.compose(save_path, audio_bytes)
+        audio_bytes = wav_composer.remove_initial_segment(audio_bytes, 100)
+        channel_connector.add_audio(audio_bytes, channel)
+
+        return jsonify({'status': 'success', 'message': 'Audio saved successfully'})
+    except Exception as e:
+        print(traceback.format_exc(), file=sys.stderr)
+        return jsonify({'status': 'error', 'message': str(e)})
+
+def compose_save_path(recording_time, channel):
+    global file_number
+    
+    save_path = f'{temp_directory}saved_audio{str(file_number)}__channel{channel}__{recording_time}.wav'
+    print(f'The audio file will be saved to {save_path}', file=sys.stderr)
+
+    file_number += 1
+    return save_path
+
+@app.route("/recorder/client/start")
+def start_recording():
+    try:
+        data = request.json
+        audio_content = data['content']
+
+        wav_recorder.start_recording()
+
+        return jsonify({'status': 'success', 'message': 'Audio file saved'})
+    except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)})
+
+@app.route("/recorder/cheeze/check")
+def check_recording():
+    try:
+        return jsonify({'status': 'success', 'is_recording': wav_recorder.is_recording()})
+    except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)})
+
+@app.route("/recorder/cheeze/send_header", methods=['POST'])
+def receive_header():
+    try:
+        data = request.json
+        audio_header = data['header']
+
+        wav_recorder.set_header(audio_header)
+
+        return jsonify({'status': 'success', 'message': 'Header received'})
+    except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)})
+
+@app.route("/recorder/cheeze/send_part", methods=['POST'])
+def receive_part():
+    try:
+        data = request.json
+        audio_content = data['content']
+
+        wav_recorder.append_data(audio_content)
+
+        return jsonify({'status': 'success', 'message': 'Content part received'})
+    except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)})
+
+@app.route("/recorder/client/stop")
+def stop_recording():
+    try:
+        data = request.json
+        audio_content = data['content']
+
+        buf = io.BytesIO()
+        wav_recorder.save_recording(buf=buf)
+        buf.seek(0)
+
+        return jsonify({'status': 'success', 'content': buf})
+    except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)})
+
diff --git a/src/channel_connector.py b/src/channel_connector.py
new file mode 100644
index 0000000..d9d51e6
--- /dev/null
+++ b/src/channel_connector.py
@@ -0,0 +1,50 @@
+import audioop
+import sys
+from datetime import datetime
+import threading
+
+from sound_localizer import SoundLocalizer
+
+class ChannelConnector:
+    def __init__(self, sample_rate, sample_width=2):
+        self.cached_audios = {}
+        self.previous_time = None
+        self.lock = threading.Lock()
+        self.timer = None
+        self.sample_width = sample_width
+        self.sound_localizer = SoundLocalizer(sample_rate)
+
+    def add_audio(self, audio_bytes, channel):
+        with self.lock:
+            self.cached_audios[channel] = audio_bytes
+
+            if self.previous_time is None:
+                self.previous_time = datetime.now()
+                # Schedule the reset and comparison after 1 second
+                self.timer = threading.Timer(1.0, self.reset_and_compare)
+                self.timer.start()
+
+    def reset_and_compare(self):
+        with self.lock:
+            if 0 in self.cached_audios and 1 in self.cached_audios:
+                self.compare(self.cached_audios[0], self.cached_audios[1])
+            self.reset()
+
+    def reset(self):
+        self.previous_time = None
+        self.cached_audios = {}
+        if self.timer:
+            self.timer.cancel()
+            self.timer = None
+
+    def compare(self, audio_left, audio_right):
+        left_rms = audioop.rms(audio_left, self.sample_width)
+        right_rms = audioop.rms(audio_right, self.sample_width)
+
+        if left_rms > 30 or right_rms > 30:
+            print(f"Left channel RMS: {left_rms}", file=sys.stderr)
+            print(f"Right channel RMS: {right_rms}", file=sys.stderr)
+            print('left' if left_rms > right_rms else 'right', file=sys.stderr)
+
+            angle = self.sound_localizer.estimate_source_angle(audio_left, audio_right)
+            print(f'Approximate angle: {angle}', file=sys.stderr)
diff --git a/src/sound_localizer.py b/src/sound_localizer.py
new file mode 100644
index 0000000..6d1e2f0
--- /dev/null
+++ b/src/sound_localizer.py
@@ -0,0 +1,52 @@
+import numpy as np
+from scipy.signal import correlate
+import math
+import sys
+
+class SoundLocalizer:
+    def __init__(self, sample_rate, channels=2):
+        self.sample_rate = sample_rate
+        self.channels = channels
+
+    def _compute_tdoa(self, sig1, sig2, framerate):
+        correlation = correlate(sig1, sig2, mode='full')
+        lag = np.argmax(correlation) - (len(sig1) - 1)
+        tdoa = lag / framerate
+        return tdoa
+
+    def _calculate_angle(self, tdoa, speed_of_sound, distance_between_mics, mic_angles):
+        angles = []
+        normalized_tdoa = tdoa * speed_of_sound / distance_between_mics
+        normalized_tdoa = np.clip(normalized_tdoa, -1, 1)
+        tdoa_angle = math.asin(normalized_tdoa) * 180 / math.pi
+        for angle in mic_angles:
+            angles.append(angle + tdoa_angle)
+        return np.mean(angles)
+
+    def estimate_source_angle(self, audio_left, audio_right, \
+    speed_of_sound=343.0, distance_between_mics=0.1):
+
+        audio_left = np.frombuffer(audio_left, dtype=np.int16)
+        audio_left = audio_left.reshape(-1, self.channels)
+        audio_right = np.frombuffer(audio_right, dtype=np.int16)
+        audio_right = audio_right.reshape(-1, self.channels)
+
+        mic1 = audio_left[:, 0]
+        mic2 = audio_left[:, 1]
+        mic3 = audio_right[:, 0]
+        mic4 = audio_right[:, 1]
+
+        tdoa_12 = self._compute_tdoa(mic1, mic2, self.sample_rate)
+        tdoa_13 = self._compute_tdoa(mic1, mic3, self.sample_rate)
+        tdoa_14 = self._compute_tdoa(mic1, mic4, self.sample_rate)
+        print(tdoa_12, tdoa_13, tdoa_14, file=sys.stderr)
+
+        mic_angles = [-33, -11, 11, 33]
+
+        angle_12 = self._calculate_angle(tdoa_12, speed_of_sound, distance_between_mics, mic_angles[:2])
+        angle_13 = self._calculate_angle(tdoa_13, speed_of_sound, distance_between_mics, mic_angles[:3])
+        angle_14 = self._calculate_angle(tdoa_14, speed_of_sound, distance_between_mics, mic_angles)
+
+        final_angle = np.mean([angle_12, angle_13, angle_14])
+
+        return final_angle
diff --git a/src/wav_composer.py b/src/wav_composer.py
new file mode 100644
index 0000000..c6e243d
--- /dev/null
+++ b/src/wav_composer.py
@@ -0,0 +1,34 @@
+import wave
+import audioop
+
+class WavComposer:
+    def __init__(self, sample_rate, channels=2, sample_width=2):
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.sample_width = sample_width
+
+    def compose(self, save_path, audio_bytes):
+        with wave.open(save_path, 'wb') as wav_file:
+            wav_file.setnchannels(self.channels) # Stereo audio
+            wav_file.setsampwidth(self.sample_width) # 16-bit audio
+            wav_file.setframerate(self.sample_rate) # Sample rate
+        
+            #left_channel = audioop.tomono(audio_bytes, 2, 1, 0)
+            #right_channel = audioop.tomono(audio_bytes, 2, 0, 1)
+
+            audio_bytes = audioop.mul(audio_bytes, 2, 40)
+
+            #left_rms = audioop.rms(left_channel, 2)
+            #right_rms = audioop.rms(right_channel, 2)
+
+            #print(f"Left channel RMS: {left_rms}", file=sys.stderr)
+            #print(f"Right channel RMS: {right_rms}", file=sys.stderr)
+            #print('left' if left_rms > right_rms else 'right', file=sys.stderr)
+
+            wav_file.writeframesraw(audio_bytes)
+
+    def remove_initial_segment(self, audio_bytes, ms):
+        bytes_per_sample = self.sample_width * self.channels
+        num_samples_to_remove = int((ms / 1000.0) * self.sample_rate)
+        bytes_to_remove = num_samples_to_remove * bytes_per_sample
+        return audio_bytes[bytes_to_remove:]
\ No newline at end of file
diff --git a/src/wav_recorder.py b/src/wav_recorder.py
new file mode 100644
index 0000000..f647cf4
--- /dev/null
+++ b/src/wav_recorder.py
@@ -0,0 +1,40 @@
+from wav_composer import WavComposer
+import datetime
+
+class WavRecorder:
+    def __init__(self, output_directory):
+        self._header = b''
+        self._data_chunks = []
+        self._output_directory = output_directory
+        self._is_recording = False
+
+    def start_recording(self):
+        self._is_recording = True
+
+    def set_header(self, header):
+        self._header = header
+
+    def append_data(self, data):
+        if not self._is_recording:
+            raise RuntimeError("Cannot append data. Recorder is not recording.")
+        self._data_chunks.append(data)
+
+    def save_recording(self, buf=None):
+        self._write_to_file(buf)
+        self._reset_recorder()
+
+    def _write_to_file(self, buf=None):
+        wav_data = self._header + b''.join(data for data in self._data_chunks)
+        output_path = buf if buf else f'{self._output_directory}/{filename}'
+        WavComposer.compose(output_path, wav_data)
+
+        print(f'Saved recording to {output_path}')
+
+    def _reset_recorder(self):
+        self._header = b''
+        self._data_chunks.clear()
+        self._is_recording = False
+
+    def is_recording(self):
+        return self._is_recording
+