diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..523bac5a --- /dev/null +++ b/.gitignore @@ -0,0 +1,128 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# IDE specific files +.idea/ +.vscode/ +*.swp +*.swo + +# Project specific +model_weights/ +*.pt +*.pth + +# Docker +.docker/ +docker-volumes/ +models/ diff --git a/Dockerfile.project b/Dockerfile.project new file mode 100644 index 00000000..50d72ff4 --- /dev/null +++ b/Dockerfile.project @@ -0,0 +1,51 @@ +FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 +WORKDIR /root +RUN apt-get update -y && apt-get install -y python3-pip +RUN pip3 install faster-whisper + + +# Set DEBIAN_FRONTEND to noninteractive to avoid prompts during apt-get install +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + libsndfile1 \ + ffmpeg \ + wget \ + && rm -rf /var/lib/apt/lists/* + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Install PyTorch compatible with CUDA 11.8 (matching base image) +# Use python3 explicitly if needed +RUN python3 -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 + +# Set working directory +WORKDIR /app + +# Copy only the requirements file first +COPY services/WhisperLive/requirements/server.txt /tmp/requirements.txt + +# Remove openai-whisper and onnxruntime lines from the copied requirements file +RUN sed -i '/openai-whisper/d' /tmp/requirements.txt || true \ + && sed -i '/onnxruntime==/d' /tmp/requirements.txt || true + +# Install remaining Python dependencies from the modified requirements file +RUN python3 -m pip install --no-cache-dir -r /tmp/requirements.txt + +# Now copy the application code +COPY services/WhisperLive/ /app/ + +# Copy our entrypoint script +COPY services/WhisperLive/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +# Set it as the entrypoint +ENTRYPOINT ["/entrypoint.sh"] + +# Default command to run the server with faster_whisper backend +CMD ["--port", "9090", "--backend", "faster_whisper"] \ No newline at end of file diff --git a/README.md b/README.md index 88d966ad..bd3dc54d 100644 --- a/README.md +++ b/README.md @@ -182,3 +182,8 @@ We are available to help you with both Open Source and proprietary AI projects. howpublished = {\url{https://github.com/snakers4/silero-vad}}, email = {hello@silero.ai} } + + + +(whisperlive) dima@bbb:~/test1/vexa/vexa/WhisperLive_fresh$ export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/python3.10/site-packages/nvidia/cudnn/lib:$LD_LIBRARY_PATH +(whisperlive) dima@bbb:~/test1/vexa/vexa/WhisperLive_fresh$ python3 run_server.py --port 9090 --backend faster_whisper -m \ No newline at end of file diff --git a/TensorRT_whisper.md b/TensorRT_whisper.md index 56a03181..02d3ba98 100644 --- a/TensorRT_whisper.md +++ b/TensorRT_whisper.md @@ -18,11 +18,18 @@ docker run -p 9090:9090 --runtime=nvidia --gpus all --entrypoint /bin/bash -it g bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en # float16 bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en int8 # int8 weight only quantization bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en int4 # int4 weight only quantization +bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples medium # convert small multilingual model bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small ``` +we have committed a docker image for medium, reuse this one! +``` +REPOSITORY TAG IMAGE ID CREATED SIZE +whisperlive-trt-medium-ready latest 8596e0157dbf 2 seconds ago 19.1GB +``` + ## Run WhisperLive Server with TensorRT Backend ```bash # Run English only model @@ -36,3 +43,9 @@ python3 run_server.py --port 9090 \ --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_small_float16" \ --trt_multilingual ``` + + +python3 run_server.py --port 9090 \ + --backend tensorrt \ + --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_medium_float16" \ + --trt_multilingual diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 00000000..0899ca69 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# Execute the WhisperLive server script with any provided arguments +exec python3 run_server.py "$@" \ No newline at end of file diff --git a/requirements/server.txt b/requirements/server.txt index 37df1718..401a745c 100644 --- a/requirements/server.txt +++ b/requirements/server.txt @@ -1,5 +1,6 @@ faster-whisper==1.1.0 websockets +websocket-client onnxruntime==1.17.0 numba kaldialign diff --git a/whisper_live/client.py b/whisper_live/client.py index 4fc9281e..4d41ac62 100644 --- a/whisper_live/client.py +++ b/whisper_live/client.py @@ -33,6 +33,9 @@ def __init__( log_transcription=True, max_clients=4, max_connection_time=600, + platform="test_platform", + meeting_url="test_url", + token="test_token" ): """ Initializes a Client instance for audio recording and streaming to a server. @@ -52,6 +55,9 @@ def __init__( log_transcription (bool, optional): Whether to log transcription output to the console. Default is True. max_clients (int, optional): Maximum number of client connections allowed. Default is 4. max_connection_time (int, optional): Maximum allowed connection time in seconds. Default is 600. + platform (str, optional): Platform identifier sent to the server. Defaults to "test_platform". + meeting_url (str, optional): Meeting URL identifier sent to the server. Defaults to "test_url". + token (str, optional): Token identifier sent to the server. Defaults to "test_token". """ self.recording = False self.task = "transcribe" @@ -69,6 +75,9 @@ def __init__( self.log_transcription = log_transcription self.max_clients = max_clients self.max_connection_time = max_connection_time + self.platform = platform + self.meeting_url = meeting_url + self.token = token if translate: self.task = "translate" @@ -195,26 +204,26 @@ def on_open(self, ws): Callback function called when the WebSocket connection is successfully opened. Sends an initial configuration message to the server, including client UID, - language selection, and task type. + language selection, task type, and potentially platform, meeting_url, token. Args: ws (websocket.WebSocketApp): The WebSocket client instance. """ print("[INFO]: Opened connection") - ws.send( - json.dumps( - { - "uid": self.uid, - "language": self.language, - "task": self.task, - "model": self.model, - "use_vad": self.use_vad, - "max_clients": self.max_clients, - "max_connection_time": self.max_connection_time, - } - ) - ) + initial_payload = { + "uid": self.uid, + "language": self.language, + "task": self.task, + "model": self.model, + "use_vad": self.use_vad, + "max_clients": self.max_clients, + "max_connection_time": self.max_connection_time, + "platform": self.platform, + "meeting_url": self.meeting_url, + "token": self.token, + } + ws.send(json.dumps(initial_payload)) def send_packet_to_server(self, message): """ @@ -682,6 +691,9 @@ class TranscriptionClient(TranscriptionTeeClient): max_clients (int, optional): Maximum number of client connections allowed. Default is 4. max_connection_time (int, optional): Maximum allowed connection time in seconds. Default is 600. mute_audio_playback (bool, optional): If True, mutes audio playback during file playback. Default is False. + platform (str, optional): Platform identifier sent to the server. Defaults to "test_platform". + meeting_url (str, optional): Meeting URL identifier sent to the server. Defaults to "test_url". + token (str, optional): Token identifier sent to the server. Defaults to "test_token". Attributes: client (Client): An instance of the underlying Client class responsible for handling the WebSocket connection. @@ -708,11 +720,17 @@ def __init__( max_clients=4, max_connection_time=600, mute_audio_playback=False, + platform="test_platform", + meeting_url="test_url", + token="test_token" ): self.client = Client( host, port, lang, translate, model, srt_file_path=output_transcription_path, use_vad=use_vad, log_transcription=log_transcription, max_clients=max_clients, - max_connection_time=max_connection_time + max_connection_time=max_connection_time, + platform=platform, + meeting_url=meeting_url, + token=token ) if save_output_recording and not output_recording_filename.endswith(".wav"): diff --git a/whisper_live/server.py b/whisper_live/server.py index f537b614..a92411ee 100644 --- a/whisper_live/server.py +++ b/whisper_live/server.py @@ -6,6 +6,8 @@ import logging from enum import Enum from typing import List, Optional +import datetime +import websocket import torch import numpy as np @@ -18,8 +20,96 @@ except Exception: pass +# Setup basic logging logging.basicConfig(level=logging.INFO) +# Add file logging for transcription data +LOG_DIR = "transcription_logs" +os.makedirs(LOG_DIR, exist_ok=True) +log_filename = os.path.join(LOG_DIR, f"transcription_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.log") +file_handler = logging.FileHandler(log_filename) +file_handler.setLevel(logging.INFO) +file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +file_handler.setFormatter(file_formatter) +logger = logging.getLogger("transcription") +logger.setLevel(logging.INFO) +logger.addHandler(file_handler) + +# Transcription Collector client +class TranscriptionCollectorClient: + def __init__(self, collector_url): + self.collector_url = collector_url + self.ws = None + self.connected = False + self.reconnect_thread = None + self.connect() + + def connect(self): + try: + self.ws = websocket.WebSocketApp( + self.collector_url, + on_open=self._on_open, + on_close=self._on_close, + on_error=self._on_error + ) + threading.Thread(target=self.ws.run_forever, daemon=True).start() + except Exception as e: + logging.error(f"Error connecting to collector: {e}") + self._schedule_reconnect() + + def _on_open(self, ws): + self.connected = True + logging.info("Connected to transcription collector") + + def _on_close(self, ws, close_status_code, close_msg): + self.connected = False + logging.info("Disconnected from transcription collector") + self._schedule_reconnect() + + def _on_error(self, ws, error): + logging.error(f"Collector connection error: {error}") + self.connected = False + self._schedule_reconnect() + + def _schedule_reconnect(self): + if self.reconnect_thread is None or not self.reconnect_thread.is_alive(): + self.reconnect_thread = threading.Timer(5.0, self.connect) + self.reconnect_thread.daemon = True + self.reconnect_thread.start() + + def send_transcription(self, data): + if self.connected and self.ws: + try: + # Validate required fields + if "uid" not in data or not data["uid"]: + logging.error("Cannot send transcription: missing client uid") + return + + if "segments" not in data or not data["segments"]: + logging.error(f"Cannot send transcription: missing segments for client {data['uid']}") + return + + # Validate critical fields + required_fields = ["platform", "meeting_url", "token"] + for field in required_fields: + if field not in data or not data[field]: + logging.error(f"Cannot send transcription: missing {field} for client {data['uid']}") + return + + # Send validated data + self.ws.send(json.dumps(data)) + logging.debug(f"Sent transcription for client {data['uid']} with {len(data['segments'])} segments") + except Exception as e: + logging.error(f"Error sending to collector: {e}") + +# Initialize collector client +collector_client = None +collector_url = os.environ.get("TRANSCRIPTION_COLLECTOR_URL") +if collector_url: + logging.info(f"Initializing transcription collector client to {collector_url}") + collector_client = TranscriptionCollectorClient(collector_url) +else: + logging.info("No transcription collector URL provided, skipping collector integration") class ClientManager: def __init__(self, max_clients=4, max_connection_time=600): @@ -158,6 +248,16 @@ def initialize_client( ): client: Optional[ServeClientBase] = None + # Extract and log the critical fields + platform = options.get("platform") + meeting_url = options.get("meeting_url") + token = options.get("token") + meeting_id = options.get("meeting_id") # Extract meeting_id from options + logging.info(f"Initializing client with uid={options['uid']}, platform={platform}, meeting_url={meeting_url}, token={token}, meeting_id={meeting_id}") + + if not platform or not meeting_url or not token: + logging.warning(f"Missing critical fields for client {options['uid']}: platform={platform}, meeting_url={meeting_url}, token={token}") + if self.backend.is_tensorrt(): try: client = ServeClientTensorRT( @@ -166,6 +266,10 @@ def initialize_client( language=options["language"], task=options["task"], client_uid=options["uid"], + platform=platform, + meeting_url=meeting_url, + token=token, + meeting_id=meeting_id, # Pass meeting_id to constructor model=whisper_tensorrt_path, single_model=self.single_model, ) @@ -191,6 +295,10 @@ def initialize_client( language=options["language"], task=options["task"], client_uid=options["uid"], + platform=platform, + meeting_url=meeting_url, + token=token, + meeting_id=meeting_id, # Pass meeting_id to constructor model=options["model"], initial_prompt=options.get("initial_prompt"), vad_parameters=options.get("vad_parameters"), @@ -228,6 +336,24 @@ def handle_new_connection(self, websocket, faster_whisper_custom_model_path, logging.info("New client connected") options = websocket.recv() options = json.loads(options) + + # Validate required parameters + required_fields = ["uid", "platform", "meeting_url", "token", "meeting_id"] + missing_fields = [field for field in required_fields if field not in options or not options[field]] + + if missing_fields: + error_msg = f"Missing required fields: {', '.join(missing_fields)}" + logging.error(error_msg) + websocket.send(json.dumps({ + "uid": options.get("uid", "unknown"), + "status": "ERROR", + "message": error_msg + })) + websocket.close() + return False + + # Log the connection with critical parameters + logging.info(f"Connection parameters received: uid={options['uid']}, platform={options['platform']}, meeting_url={options['meeting_url']}, token={options['token']}, meeting_id={options['meeting_id']}") if self.client_manager is None: max_clients = options.get('max_clients', 4) @@ -350,6 +476,10 @@ def run(self, logging.info("Single model mode currently only works with custom models.") if not BackendType.is_valid(backend): raise ValueError(f"{backend} is not a valid backend type. Choose backend from {BackendType.valid_types()}") + + # Log server startup information + logger.info(f"SERVER_START: host={host}, port={port}, backend={backend}, single_model={single_model}") + with serve( functools.partial( self.recv_audio, @@ -361,6 +491,7 @@ def run(self, host, port ) as server: + logger.info(f"SERVER_RUNNING: WhisperLive server running on {host}:{port}") server.serve_forever() def voice_activity(self, websocket, frame_np): @@ -409,9 +540,18 @@ class ServeClientBase(object): SERVER_READY = "SERVER_READY" DISCONNECT = "DISCONNECT" - def __init__(self, client_uid, websocket): - self.client_uid = client_uid + def __init__(self, websocket, language="en", task="transcribe", client_uid=None, platform=None, meeting_url=None, token=None, meeting_id=None): self.websocket = websocket + self.language = language + self.task = task + self.client_uid = client_uid + self.platform = platform + self.meeting_url = meeting_url + self.token = token + self.meeting_id = meeting_id + self.transcription_buffer = TranscriptionBuffer(self.client_uid) + self.model = None + self.is_multilingual = True self.frames = b"" self.timestamp_offset = 0.0 self.frames_np = None @@ -552,12 +692,45 @@ def send_transcription_to_client(self, segments): segments (list): A list of transcription segments to be sent to the client. """ try: - self.websocket.send( - json.dumps({ + # Validate required client properties + if not self.platform or not self.meeting_url or not self.token: + logging.error(f"ERROR: Missing required fields for client {self.client_uid}: platform={self.platform}, meeting_url={self.meeting_url}, token={self.token}") + # Don't default to unknown anymore, force these to be set properly + return + + data = { + "uid": self.client_uid, + "segments": segments, + } + self.websocket.send(json.dumps(data)) + + # Send to transcription collector if available + global collector_client + if collector_client: + # Include platform, meeting_url, and token in data sent to collector + collector_data = { "uid": self.client_uid, - "segments": segments, - }) - ) + "platform": self.platform, # Must be properly set now + "meeting_url": self.meeting_url, # Must be properly set now + "token": self.token, # Must be properly set now + "meeting_id": self.meeting_id, # Include meeting_id in the data + "segments": segments + } + collector_client.send_transcription(collector_data) + + # Log the transcription data to file with more detailed formatting + formatted_segments = [] + for i, segment in enumerate(segments): + if 'start' in segment and 'end' in segment: + formatted_segments.append( + f"[{i}] ({segment.get('start', 'N/A')}-{segment.get('end', 'N/A')}) " + f"[{'COMPLETE' if segment.get('completed', False) else 'PARTIAL'}]: " + f"\"{segment.get('text', '')}\"" + ) + else: + formatted_segments.append(f"[{i}]: \"{segment.get('text', '')}\"") + + logger.info(f"TRANSCRIPTION: client={self.client_uid}, platform={self.platform}, meeting_url={self.meeting_url}, token={self.token}, meeting_id={self.meeting_id}, segments=\n" + "\n".join(formatted_segments)) except Exception as e: logging.error(f"[ERROR]: Sending data to client: {e}") @@ -586,13 +759,26 @@ def cleanup(self): logging.info("Cleaning up.") self.exit = True + def forward_to_collector(self, segments): + """Forward transcriptions to the collector if available""" + if collector_client and segments: + data = { + "uid": self.client_uid, + "platform": self.platform, + "meeting_url": self.meeting_url, + "token": self.token, + "meeting_id": self.meeting_id, + "segments": segments + } + collector_client.send_transcription(data) + class ServeClientTensorRT(ServeClientBase): SINGLE_MODEL = None SINGLE_MODEL_LOCK = threading.Lock() - def __init__(self, websocket, task="transcribe", multilingual=False, language=None, client_uid=None, model=None, single_model=False): + def __init__(self, websocket, task="transcribe", multilingual=False, language=None, client_uid=None, model=None, single_model=False, platform=None, meeting_url=None, token=None, meeting_id=None): """ Initialize a ServeClient instance. The Whisper model is initialized based on the client's language and device availability. @@ -607,12 +793,15 @@ def __init__(self, websocket, task="transcribe", multilingual=False, language=No language (str, optional): The language for transcription. Defaults to None. client_uid (str, optional): A unique identifier for the client. Defaults to None. single_model (bool, optional): Whether to instantiate a new model for each client connection. Defaults to False. - + platform (str, optional): The platform identifier (e.g., "google_meet"). Defaults to None. + meeting_url (str, optional): The URL of the meeting. Defaults to None. + token (str, optional): The token to use for identifying the client. Defaults to None. """ - super().__init__(client_uid, websocket) - self.language = language if multilingual else "en" - self.task = task + super().__init__(websocket, language, task, client_uid, platform, meeting_url, token, meeting_id) self.eos = False + + # Log the critical parameters + logging.info(f"Initializing TensorRT client {client_uid} with platform={platform}, meeting_url={meeting_url}, token={token}") if single_model: if ServeClientTensorRT.SINGLE_MODEL is None: @@ -713,12 +902,25 @@ def update_timestamp_offset(self, last_segment, duration): last_segment (str): Last transcribed audio from the whisper model. duration (float): Duration of the last audio chunk. """ - if not len(self.transcript): - self.transcript.append({"text": last_segment + " "}) - elif self.transcript[-1]["text"].strip() != last_segment: - self.transcript.append({"text": last_segment + " "}) - with self.lock: + start_time = self.timestamp_offset + end_time = self.timestamp_offset + duration + + if not len(self.transcript): + self.transcript.append({ + "text": last_segment + " ", + "start": "{:.3f}".format(start_time), + "end": "{:.3f}".format(end_time), + "completed": True + }) + elif self.transcript[-1]["text"].strip() != last_segment: + self.transcript.append({ + "text": last_segment + " ", + "start": "{:.3f}".format(start_time), + "end": "{:.3f}".format(end_time), + "completed": True + }) + self.timestamp_offset += duration def speech_to_text(self): @@ -761,6 +963,28 @@ def speech_to_text(self): except Exception as e: logging.error(f"[ERROR]: {e}") + def send(self, partial_segments, completed_segments): + # Add transcriptions to buffer + self.transcription_buffer.add_segments(partial_segments, completed_segments) + + # Get formatted segments for the response + response_segments = self.transcription_buffer.get_segments_for_response() + + # Forward completed segments to collector + if completed_segments: + self.forward_to_collector(completed_segments) + + # Construct and send response + response = { + "uid": self.client_uid, + "segments": response_segments + } + + try: + self.websocket.send(json.dumps(response)) + except ConnectionClosed: + logging.warning(f"Connection closed for client {self.client_uid} while sending transcription.") + class ServeClientFasterWhisper(ServeClientBase): @@ -768,7 +992,7 @@ class ServeClientFasterWhisper(ServeClientBase): SINGLE_MODEL_LOCK = threading.Lock() def __init__(self, websocket, task="transcribe", device=None, language=None, client_uid=None, model="small.en", - initial_prompt=None, vad_parameters=None, use_vad=True, single_model=False): + initial_prompt=None, vad_parameters=None, use_vad=True, single_model=False, platform=None, meeting_url=None, token=None, meeting_id=None): """ Initialize a ServeClient instance. The Whisper model is initialized based on the client's language and device availability. @@ -784,14 +1008,20 @@ def __init__(self, websocket, task="transcribe", device=None, language=None, cli model (str, optional): The whisper model size. Defaults to 'small.en' initial_prompt (str, optional): Prompt for whisper inference. Defaults to None. single_model (bool, optional): Whether to instantiate a new model for each client connection. Defaults to False. + platform (str, optional): The platform identifier (e.g., "google_meet"). Defaults to None. + meeting_url (str, optional): The URL of the meeting. Defaults to None. + token (str, optional): The token to use for identifying the client. Defaults to None. """ - super().__init__(client_uid, websocket) + super().__init__(websocket, language, task, client_uid, platform, meeting_url, token, meeting_id) self.model_sizes = [ "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v2", "large-v3", "distil-small.en", "distil-medium.en", "distil-large-v2", "distil-large-v3", "large-v3-turbo", "turbo" ] + + # Log the critical parameters + logging.info(f"Initializing FasterWhisper client {client_uid} with platform={platform}, meeting_url={meeting_url}, token={token}") self.model_size_or_path = model self.language = "en" if self.model_size_or_path.endswith("en") else language @@ -894,8 +1124,16 @@ def set_language(self, info): if info.language_probability > 0.5: self.language = info.language logging.info(f"Detected language {self.language} with probability {info.language_probability}") - self.websocket.send(json.dumps( - {"uid": self.client_uid, "language": self.language, "language_prob": info.language_probability})) + + language_data = { + "uid": self.client_uid, + "language": self.language, + "language_prob": info.language_probability + } + self.websocket.send(json.dumps(language_data)) + + # Log the language detection to file in a more readable format + logger.info(f"LANGUAGE_DETECTION: client={self.client_uid}, language={self.language}, confidence={info.language_probability:.4f}") def transcribe_audio(self, input_sample): """ @@ -1132,3 +1370,41 @@ def update_segments(self, segments, duration): self.timestamp_offset += offset return last_segment + +# Add the missing TranscriptionBuffer class +class TranscriptionBuffer: + """Manages buffers of transcription segments for a client""" + + def __init__(self, client_uid): + """Initialize with client ID""" + self.client_uid = client_uid + self.partial_segments = [] + self.completed_segments = [] + self.max_segments = 50 # Max number of segments to keep in history + + def add_segments(self, partial_segments, completed_segments): + """Add new segments to the appropriate buffers""" + if partial_segments: + self.partial_segments = partial_segments + + if completed_segments: + # Add new completed segments + self.completed_segments.extend(completed_segments) + # Trim if exceeding max size + if len(self.completed_segments) > self.max_segments: + self.completed_segments = self.completed_segments[-self.max_segments:] + + def get_segments_for_response(self): + """Get formatted segments for client response""" + # Return completed segments plus any partial segments + result = [] + + # Add completed segments + if self.completed_segments: + result.extend(self.completed_segments) + + # Add partial segments + if self.partial_segments: + result.extend(self.partial_segments) + + return result