Initial commit

2024-06-21 20:28:21 +03:00 · 2024-06-21 20:28:21 +03:00 · daa2b218b2
commit daa2b218b2
10 changed files with 723 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,142 @@
 .git
 Dockerfile
 .DS_Store
 .gitignore
 .dockerignore
 /credentials
 /cache
 /store
 /node_modules
 # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
 # General
 *.DS_Store
 .AppleDouble
 .LSOverride
 # Icon must end with two \r
 Icon
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 # https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,163 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 temp/
 recordings/
--- a/12
+++ b/12
@ -0,0 +1,12 @@
 FROM python:3.10-slim
 WORKDIR /app
 COPY requirements.txt /app
 RUN pip3 install -r requirements.txt
 COPY . .
 ENV FLASK_APP=src/app.py
 CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8081"]
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,14 @@
 version: '2.1'
 services:
  rat:
    container_name: rat
    image: rat
    build: .
    network_mode: host
    volumes:
      - "./src:/app/src"
      - "./temp:/app/temp"
      - "./recordings:/app/recordings"
    environment:
      CLIENT_URL: "http://192.168.0.119:5000"
      HARPYIA_URL: "http://localhost:8080"
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,15 @@
 blinker==1.7.0
 certifi==2024.2.2
 charset-normalizer==3.3.2
 click==8.1.7
 Flask==3.0.2
 idna==3.6
 itsdangerous==2.1.2
 Jinja2==3.1.3
 MarkupSafe==2.1.5
 numpy==2.0.0
 python-dotenv==1.0.1
 requests==2.31.0
 scipy==1.13.1
 urllib3==2.2.1
 Werkzeug==3.0.1
--- a/src/app.py
+++ b/src/app.py
@ -0,0 +1,201 @@
 from flask import Flask, abort, request, jsonify
 from tempfile import NamedTemporaryFile
 from datetime import datetime
 from dotenv import load_dotenv
 import os
 import sys
 import requests
 import base64
 import io
 import time
 import glob
 import traceback
 from datetime import datetime
 from wav_recorder import WavRecorder
 from wav_composer import WavComposer
 from channel_connector import ChannelConnector
 load_dotenv()
 CLIENT_URL = os.getenv('CLIENT_URL') or ''
 HARPYIA_URL = os.getenv('HARPYIA_URL') or ''
 app = Flask(__name__)
 file_number = 1
 temp_directory = 'temp/'
 recordings_directory = 'recordings/'
 def prepare_dirs():
    os.makedirs(os.path.dirname(temp_directory), exist_ok=True)
    os.makedirs(os.path.dirname(recordings_directory), exist_ok=True)
    files = glob.glob(temp_directory + "/*")
    for f in files:
            os.remove(f)
 prepare_dirs()
 wav_recorder = WavRecorder(recordings_directory)
 wav_composer = WavComposer(16000)
 channel_connector = ChannelConnector(16000)
@app.route("/")
 def hello():
    return "To recognize an audio file, upload it using a POST request with '/save_audio' route."
@app.route("/time")
 def get_time():
    current_timestamp = int(time.time())
    return {"current_timestamp": current_timestamp}
@app.route("/aid/ready", methods=['POST'])
 def count():
    # requests.post(CLIENT_URL + '/state', json={'state': 1})
    return jsonify({}), 200
@app.route('/aid/transcript', methods=['POST'])
 def handler():
    data = request.json
    audio_content = data['audio']['content']
    channel = data['audio']['channel']
    data['audio']['content'] = ''
    print(data, file=sys.stderr)
    initial_time = data['audio']['initialTime']
    micros = data['audio']['micros']
    # Decode base64 audio content
    audio_bytes = base64.b64decode(audio_content)
    # Save the audio as a WAV file
    buf = io.BytesIO()
    wav_composer.compose(buf, audio_bytes, channel=channel)
    buf.seek(0)
    try:
        requests.post(CLIENT_URL + '/state', json={'state': 0})
        files = {'file': buf}
        response = requests.post(HARPYIA_URL + '/recognize', files=files)
        if response.status_code == 200:
            data = response.json()
            results = data.get('results', [])
            transcript = ''
            for entity in results:
                transcript = f'{transcript} {entity.get("transcript")}'
            record_time = initial_time + int(micros / 1000000)
            record_time_str = datetime.fromtimestamp(record_time).strftime('%Y-%m-%d %H:%M:%S')
            record_millis_str = str(int(micros / 1000) % 1000).zfill(3)
            record_micros_str = str(micros % 1000).zfill(3)
            message = f'[{record_time_str}:{record_millis_str}:{record_micros_str}] {transcript}'
            print(message, file=sys.stderr)
            requests.post(CLIENT_URL + '/message', json={'message': message, 'results': entity.get('results')})
            return jsonify({'message': transcript})
        else:
            return jsonify({'error': 'Error occurred on Harpyia'}), 500
    except Exception as e:
        return jsonify({'error': str(e)}), 500
@app.route('/aid/save', methods=['POST'])
 def save_audio():
    try:
        data = request.json
        audio_content = data['audio']['content']
        micros = data['audio']['micros']
        initial_time = data['audio']['initialTime']
        record_time = initial_time + int(micros / 1000000)
        record_millis_str = str(int(micros / 1000) % 1000).zfill(3)
        record_micros_str = str(micros % 1000).zfill(3)
        recording_time = datetime.fromtimestamp(record_time).strftime("%d_%m_%Y__%H_%M_%S")
        recording_time += f'_{record_millis_str}_{record_micros_str}'
        channel = data['audio']['channel']
        save_path = compose_save_path(recording_time, channel)
        audio_bytes = base64.b64decode(audio_content)
        wav_composer.compose(save_path, audio_bytes)
        audio_bytes = wav_composer.remove_initial_segment(audio_bytes, 100)
        channel_connector.add_audio(audio_bytes, channel)
        return jsonify({'status': 'success', 'message': 'Audio saved successfully'})
    except Exception as e:
        print(traceback.format_exc(), file=sys.stderr)
        return jsonify({'status': 'error', 'message': str(e)})
 def compose_save_path(recording_time, channel):
    global file_number
    save_path = f'{temp_directory}saved_audio{str(file_number)}__channel{channel}__{recording_time}.wav'
    print(f'The audio file will be saved to {save_path}', file=sys.stderr)
    file_number += 1
    return save_path
@app.route("/recorder/client/start")
 def start_recording():
    try:
        data = request.json
        audio_content = data['content']
        wav_recorder.start_recording()
        return jsonify({'status': 'success', 'message': 'Audio file saved'})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/cheeze/check")
 def check_recording():
    try:
        return jsonify({'status': 'success', 'is_recording': wav_recorder.is_recording()})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/cheeze/send_header", methods=['POST'])
 def receive_header():
    try:
        data = request.json
        audio_header = data['header']
        wav_recorder.set_header(audio_header)
        return jsonify({'status': 'success', 'message': 'Header received'})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/cheeze/send_part", methods=['POST'])
 def receive_part():
    try:
        data = request.json
        audio_content = data['content']
        wav_recorder.append_data(audio_content)
        return jsonify({'status': 'success', 'message': 'Content part received'})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)})
@app.route("/recorder/client/stop")
 def stop_recording():
    try:
        data = request.json
        audio_content = data['content']
        buf = io.BytesIO()
        wav_recorder.save_recording(buf=buf)
        buf.seek(0)
        return jsonify({'status': 'success', 'content': buf})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)})
--- a/src/channel_connector.py
+++ b/src/channel_connector.py
@ -0,0 +1,50 @@
 import audioop
 import sys
 from datetime import datetime
 import threading
 from sound_localizer import SoundLocalizer
 class ChannelConnector:
    def __init__(self, sample_rate, sample_width=2):
        self.cached_audios = {}
        self.previous_time = None
        self.lock = threading.Lock()
        self.timer = None
        self.sample_width = sample_width
        self.sound_localizer = SoundLocalizer(sample_rate)
    def add_audio(self, audio_bytes, channel):
        with self.lock:
            self.cached_audios[channel] = audio_bytes
            if self.previous_time is None:
                self.previous_time = datetime.now()
                # Schedule the reset and comparison after 1 second
                self.timer = threading.Timer(1.0, self.reset_and_compare)
                self.timer.start()
    def reset_and_compare(self):
        with self.lock:
            if 0 in self.cached_audios and 1 in self.cached_audios:
                self.compare(self.cached_audios[0], self.cached_audios[1])
            self.reset()
    def reset(self):
        self.previous_time = None
        self.cached_audios = {}
        if self.timer:
            self.timer.cancel()
            self.timer = None
    def compare(self, audio_left, audio_right):
        left_rms = audioop.rms(audio_left, self.sample_width)
        right_rms = audioop.rms(audio_right, self.sample_width)
        if left_rms > 30 or right_rms > 30:
            print(f"Left channel RMS: {left_rms}", file=sys.stderr)
            print(f"Right channel RMS: {right_rms}", file=sys.stderr)
            print('left' if left_rms > right_rms else 'right', file=sys.stderr)
            angle = self.sound_localizer.estimate_source_angle(audio_left, audio_right)
            print(f'Approximate angle: {angle}', file=sys.stderr)
--- a/src/sound_localizer.py
+++ b/src/sound_localizer.py
@ -0,0 +1,52 @@
 import numpy as np
 from scipy.signal import correlate
 import math
 import sys
 class SoundLocalizer:
    def __init__(self, sample_rate, channels=2):
        self.sample_rate = sample_rate
        self.channels = channels
    def _compute_tdoa(self, sig1, sig2, framerate):
        correlation = correlate(sig1, sig2, mode='full')
        lag = np.argmax(correlation) - (len(sig1) - 1)
        tdoa = lag / framerate
        return tdoa
    def _calculate_angle(self, tdoa, speed_of_sound, distance_between_mics, mic_angles):
        angles = []
        normalized_tdoa = tdoa * speed_of_sound / distance_between_mics
        normalized_tdoa = np.clip(normalized_tdoa, -1, 1)
        tdoa_angle = math.asin(normalized_tdoa) * 180 / math.pi
        for angle in mic_angles:
            angles.append(angle + tdoa_angle)
        return np.mean(angles)
    def estimate_source_angle(self, audio_left, audio_right, \
    speed_of_sound=343.0, distance_between_mics=0.1):
        audio_left = np.frombuffer(audio_left, dtype=np.int16)
        audio_left = audio_left.reshape(-1, self.channels)
        audio_right = np.frombuffer(audio_right, dtype=np.int16)
        audio_right = audio_right.reshape(-1, self.channels)
        mic1 = audio_left[:, 0]
        mic2 = audio_left[:, 1]
        mic3 = audio_right[:, 0]
        mic4 = audio_right[:, 1]
        tdoa_12 = self._compute_tdoa(mic1, mic2, self.sample_rate)
        tdoa_13 = self._compute_tdoa(mic1, mic3, self.sample_rate)
        tdoa_14 = self._compute_tdoa(mic1, mic4, self.sample_rate)
        print(tdoa_12, tdoa_13, tdoa_14, file=sys.stderr)
        mic_angles = [-33, -11, 11, 33]
        angle_12 = self._calculate_angle(tdoa_12, speed_of_sound, distance_between_mics, mic_angles[:2])
        angle_13 = self._calculate_angle(tdoa_13, speed_of_sound, distance_between_mics, mic_angles[:3])
        angle_14 = self._calculate_angle(tdoa_14, speed_of_sound, distance_between_mics, mic_angles)
        final_angle = np.mean([angle_12, angle_13, angle_14])
        return final_angle
--- a/src/wav_composer.py
+++ b/src/wav_composer.py
@ -0,0 +1,34 @@
 import wave
 import audioop
 class WavComposer:
    def __init__(self, sample_rate, channels=2, sample_width=2):
        self.sample_rate = sample_rate
        self.channels = channels
        self.sample_width = sample_width
    def compose(self, save_path, audio_bytes):
        with wave.open(save_path, 'wb') as wav_file:
            wav_file.setnchannels(self.channels) # Stereo audio
            wav_file.setsampwidth(self.sample_width) # 16-bit audio
            wav_file.setframerate(self.sample_rate) # Sample rate
            #left_channel = audioop.tomono(audio_bytes, 2, 1, 0)
            #right_channel = audioop.tomono(audio_bytes, 2, 0, 1)
            audio_bytes = audioop.mul(audio_bytes, 2, 40)
            #left_rms = audioop.rms(left_channel, 2)
            #right_rms = audioop.rms(right_channel, 2)
            #print(f"Left channel RMS: {left_rms}", file=sys.stderr)
            #print(f"Right channel RMS: {right_rms}", file=sys.stderr)
            #print('left' if left_rms > right_rms else 'right', file=sys.stderr)
            wav_file.writeframesraw(audio_bytes)
    def remove_initial_segment(self, audio_bytes, ms):
        bytes_per_sample = self.sample_width * self.channels
        num_samples_to_remove = int((ms / 1000.0) * self.sample_rate)
        bytes_to_remove = num_samples_to_remove * bytes_per_sample
        return audio_bytes[bytes_to_remove:]
--- a/src/wav_recorder.py
+++ b/src/wav_recorder.py
@ -0,0 +1,40 @@
 from wav_composer import WavComposer
 import datetime
 class WavRecorder:
    def __init__(self, output_directory):
        self._header = b''
        self._data_chunks = []
        self._output_directory = output_directory
        self._is_recording = False
    def start_recording(self):
        self._is_recording = True
    def set_header(self, header):
        self._header = header
    def append_data(self, data):
        if not self._is_recording:
            raise RuntimeError("Cannot append data. Recorder is not recording.")
        self._data_chunks.append(data)
    def save_recording(self, buf=None):
        self._write_to_file(buf)
        self._reset_recorder()
    def _write_to_file(self, buf=None):
        wav_data = self._header + b''.join(data for data in self._data_chunks)
        output_path = buf if buf else f'{self._output_directory}/{filename}'
        WavComposer.compose(output_path, wav_data)
        print(f'Saved recording to {output_path}')
    def _reset_recorder(self):
        self._header = b''
        self._data_chunks.clear()
        self._is_recording = False
    def is_recording(self):
        return self._is_recording