LocalAI/backend/python/whisperx/test.py

"""
A test script to test the gRPC service for WhisperX transcription
"""
import unittest
import subprocess
import time
import os
import tempfile
import shutil
import backend_pb2
import backend_pb2_grpc

import grpc


class TestBackendServicer(unittest.TestCase):
    """
    TestBackendServicer is the class that tests the gRPC service
    """
    def setUp(self):
        """
        This method sets up the gRPC service by starting the server
        """
        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
        time.sleep(10)

    def tearDown(self) -> None:
        """
        This method tears down the gRPC service by terminating the server
        """
        self.service.terminate()
        self.service.wait()

    def test_server_startup(self):
        """
        This method tests if the server starts up successfully
        """
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
                response = stub.Health(backend_pb2.HealthMessage())
                self.assertEqual(response.message, b'OK')
        except Exception as err:
            print(err)
            self.fail("Server failed to start")
        finally:
            self.tearDown()

    def test_load_model(self):
        """
        This method tests if the model is loaded successfully
        """
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
                response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny"))
                self.assertTrue(response.success)
                self.assertEqual(response.message, "Model loaded successfully")
        except Exception as err:
            print(err)
            self.fail("LoadModel service failed")
        finally:
            self.tearDown()

    def test_audio_transcription(self):
        """
        This method tests if audio transcription works successfully
        """
        # Create a temporary directory for the audio file
        temp_dir = tempfile.mkdtemp()
        audio_file = os.path.join(temp_dir, 'audio.wav')

        try:
            # Download the audio file to the temporary directory
            print(f"Downloading audio file to {audio_file}...")
            url = "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav"
            result = subprocess.run(
                ["wget", "-q", url, "-O", audio_file],
                capture_output=True,
                text=True
            )
            if result.returncode != 0:
                self.fail(f"Failed to download audio file: {result.stderr}")

            # Verify the file was downloaded
            if not os.path.exists(audio_file):
                self.fail(f"Audio file was not downloaded to {audio_file}")

            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
                # Load the model first
                load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny"))
                self.assertTrue(load_response.success)

                # Perform transcription without diarization
                transcript_request = backend_pb2.TranscriptRequest(dst=audio_file)
                transcript_response = stub.AudioTranscription(transcript_request)

                # Print the transcribed text for debugging
                print(f"Transcribed text: {transcript_response.text}")
                print(f"Number of segments: {len(transcript_response.segments)}")

                # Verify response structure
                self.assertIsNotNone(transcript_response)
                self.assertIsNotNone(transcript_response.text)
                self.assertGreater(len(transcript_response.text), 0)
                self.assertGreater(len(transcript_response.segments), 0)

                # Verify segments have timing info
                segment = transcript_response.segments[0]
                self.assertIsNotNone(segment.text)
                self.assertIsInstance(segment.id, int)

        except Exception as err:
            print(err)
            self.fail("AudioTranscription service failed")
        finally:
            self.tearDown()
            # Clean up the temporary directory
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)
feat(whisperx): add whisperx backend for transcription with speaker diarization (#8299) * feat(proto): add speaker field to TranscriptSegment for diarization Add speaker field to the gRPC TranscriptSegment message and map it through the Go schema, enabling backends to return speaker labels. Signed-off-by: eureka928 <meobius123@gmail.com> * feat(whisperx): add whisperx backend for transcription with diarization Add Python gRPC backend using WhisperX for speech-to-text with word-level timestamps, forced alignment, and speaker diarization via pyannote-audio when HF_TOKEN is provided. Signed-off-by: eureka928 <meobius123@gmail.com> * feat(whisperx): register whisperx backend in Makefile Signed-off-by: eureka928 <meobius123@gmail.com> * feat(whisperx): add whisperx meta and image entries to index.yaml Signed-off-by: eureka928 <meobius123@gmail.com> * ci(whisperx): add build matrix entries for CPU, CUDA 12/13, and ROCm Signed-off-by: eureka928 <meobius123@gmail.com> * fix(whisperx): unpin torch versions and use CPU index for cpu requirements Address review feedback: - Use --extra-index-url for CPU torch wheels to reduce size - Remove torch version pins, let uv resolve compatible versions Signed-off-by: eureka928 <meobius123@gmail.com> * fix(whisperx): pin torch ROCm variant to fix CI build failure Signed-off-by: eureka928 <meobius123@gmail.com> * fix(whisperx): pin torch CPU variant to fix uv resolution failure Pin torch==2.8.0+cpu so uv resolves the CPU wheel from the extra index instead of picking torch==2.8.0+cu128 from PyPI, which pulls unresolvable CUDA dependencies. Signed-off-by: eureka928 <meobius123@gmail.com> * fix(whisperx): use unsafe-best-match index strategy to fix uv resolution failure uv's default first-match strategy finds torch on PyPI before checking the extra index, causing it to pick torch==2.8.0+cu128 instead of the CPU variant. This makes whisperx's transitive torch dependency unresolvable. Using unsafe-best-match lets uv consider all indexes. Signed-off-by: eureka928 <meobius123@gmail.com> * fix(whisperx): drop +cpu local version suffix to fix uv resolution failure PEP 440 ==2.8.0 matches 2.8.0+cpu from the extra index, avoiding the issue where uv cannot locate an explicit +cpu local version specifier. This aligns with the pattern used by all other CPU backends. Signed-off-by: eureka928 <meobius123@gmail.com> * fix(backends): drop +rocm local version suffixes from hipblas requirements to fix uv resolution uv cannot resolve PEP 440 local version specifiers (e.g. +rocm6.4, +rocm6.3) in pinned requirements. The --extra-index-url already points to the correct ROCm wheel index and --index-strategy unsafe-best-match (set in libbackend.sh) ensures the ROCm variant is preferred. Applies the same fix as 7f5d72e8 (which resolved this for +cpu) across all 14 hipblas requirements files. Signed-off-by: eureka928 <meobius123@gmail.com> Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: eureka928 <meobius123@gmail.com> * revert: scope hipblas suffix fix to whisperx only Reverts changes to non-whisperx hipblas requirements files per maintainer review — other backends are building fine with the +rocm local version suffix. Signed-off-by: eureka928 <meobius123@gmail.com> Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: eureka928 <meobius123@gmail.com> --------- Signed-off-by: eureka928 <meobius123@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-02 15:33:12 +00:00			`"""`
			`A test script to test the gRPC service for WhisperX transcription`
			`"""`
			`import unittest`
			`import subprocess`
			`import time`
			`import os`
			`import tempfile`
			`import shutil`
			`import backend_pb2`
			`import backend_pb2_grpc`

			`import grpc`


			`class TestBackendServicer(unittest.TestCase):`
			`"""`
			`TestBackendServicer is the class that tests the gRPC service`
			`"""`
			`def setUp(self):`
			`"""`
			`This method sets up the gRPC service by starting the server`
			`"""`
			`self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])`
			`time.sleep(10)`

			`def tearDown(self) -> None:`
			`"""`
			`This method tears down the gRPC service by terminating the server`
			`"""`
			`self.service.terminate()`
			`self.service.wait()`

			`def test_server_startup(self):`
			`"""`
			`This method tests if the server starts up successfully`
			`"""`
			`try:`
			`self.setUp()`
			`with grpc.insecure_channel("localhost:50051") as channel:`
			`stub = backend_pb2_grpc.BackendStub(channel)`
			`response = stub.Health(backend_pb2.HealthMessage())`
			`self.assertEqual(response.message, b'OK')`
			`except Exception as err:`
			`print(err)`
			`self.fail("Server failed to start")`
			`finally:`
			`self.tearDown()`

			`def test_load_model(self):`
			`"""`
			`This method tests if the model is loaded successfully`
			`"""`
			`try:`
			`self.setUp()`
			`with grpc.insecure_channel("localhost:50051") as channel:`
			`stub = backend_pb2_grpc.BackendStub(channel)`
			`response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny"))`
			`self.assertTrue(response.success)`
			`self.assertEqual(response.message, "Model loaded successfully")`
			`except Exception as err:`
			`print(err)`
			`self.fail("LoadModel service failed")`
			`finally:`
			`self.tearDown()`

			`def test_audio_transcription(self):`
			`"""`
			`This method tests if audio transcription works successfully`
			`"""`
			`# Create a temporary directory for the audio file`
			`temp_dir = tempfile.mkdtemp()`
			`audio_file = os.path.join(temp_dir, 'audio.wav')`

			`try:`
			`# Download the audio file to the temporary directory`
			`print(f"Downloading audio file to {audio_file}...")`
			`url = "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav"`
			`result = subprocess.run(`
			`["wget", "-q", url, "-O", audio_file],`
			`capture_output=True,`
			`text=True`
			`)`
			`if result.returncode != 0:`
			`self.fail(f"Failed to download audio file: {result.stderr}")`

			`# Verify the file was downloaded`
			`if not os.path.exists(audio_file):`
			`self.fail(f"Audio file was not downloaded to {audio_file}")`

			`self.setUp()`
			`with grpc.insecure_channel("localhost:50051") as channel:`
			`stub = backend_pb2_grpc.BackendStub(channel)`
			`# Load the model first`
			`load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny"))`
			`self.assertTrue(load_response.success)`

			`# Perform transcription without diarization`
			`transcript_request = backend_pb2.TranscriptRequest(dst=audio_file)`
			`transcript_response = stub.AudioTranscription(transcript_request)`

			`# Print the transcribed text for debugging`
			`print(f"Transcribed text: {transcript_response.text}")`
			`print(f"Number of segments: {len(transcript_response.segments)}")`

			`# Verify response structure`
			`self.assertIsNotNone(transcript_response)`
			`self.assertIsNotNone(transcript_response.text)`
			`self.assertGreater(len(transcript_response.text), 0)`
			`self.assertGreater(len(transcript_response.segments), 0)`

			`# Verify segments have timing info`
			`segment = transcript_response.segments[0]`
			`self.assertIsNotNone(segment.text)`
			`self.assertIsInstance(segment.id, int)`

			`except Exception as err:`
			`print(err)`
			`self.fail("AudioTranscription service failed")`
			`finally:`
			`self.tearDown()`
			`# Clean up the temporary directory`
			`if os.path.exists(temp_dir):`
			`shutil.rmtree(temp_dir)`