LocalAI/backend/python/faster-whisper/backend.py

#!/usr/bin/env python3
"""
This is an extra gRPC server of LocalAI for Faster Whisper TTS
"""
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
import torch
from faster_whisper import WhisperModel

import grpc
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'common'))
from grpc_auth import get_auth_interceptors


_ONE_DAY_IN_SECONDS = 60 * 60 * 24

# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)

# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
    """
    BackendServicer is the class that implements the gRPC service
    """
    def Health(self, request, context):
        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
    def LoadModel(self, request, context):
        device = "cpu"
        # Get device
        # device = "cuda" if request.CUDA else "cpu"
        if request.CUDA:
            device = "cuda"
        mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
        if mps_available:
            device = "mps"
        try:
            print("Preparing models, please wait", file=sys.stderr)
            self.model = WhisperModel(request.Model, device=device, compute_type="default")
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
        # Replace this with your desired response
        return backend_pb2.Result(message="Model loaded successfully", success=True)

    def AudioTranscription(self, request, context):
        resultSegments = []
        text = ""
        try:
            segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
            id = 0
            for segment in segments:
                print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
                resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=int(segment.start)*1e9, end=int(segment.end)*1e9, text=segment.text))
                text += segment.text
                id += 1
        except Exception as err:
            print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
            raise err

        return backend_pb2.TranscriptResult(segments=resultSegments, text=text)

def serve(address):
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
        options=[
            ('grpc.max_message_length', 50 * 1024 * 1024),  # 50MB
            ('grpc.max_send_message_length', 50 * 1024 * 1024),  # 50MB
            ('grpc.max_receive_message_length', 50 * 1024 * 1024),  # 50MB
        ],
        interceptors=get_auth_interceptors(),
    )
    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
    server.add_insecure_port(address)
    server.start()
    print("Server started. Listening on: " + address, file=sys.stderr)

    # Define the signal handler function
    def signal_handler(sig, frame):
        print("Received termination signal. Shutting down...")
        server.stop(0)
        sys.exit(0)

    # Set the signal handlers for SIGINT and SIGTERM
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)

    try:
        while True:
            time.sleep(_ONE_DAY_IN_SECONDS)
    except KeyboardInterrupt:
        server.stop(0)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the gRPC server.")
    parser.add_argument(
        "--addr", default="localhost:50051", help="The address to bind the server to."
    )
    args = parser.parse_args()

    serve(args.addr)
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`#!/usr/bin/env python3`
			`"""`
chore: drop bark which is unmaintained (#8207) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-01-25 08:26:40 +00:00			`This is an extra gRPC server of LocalAI for Faster Whisper TTS`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`"""`
			`from concurrent import futures`
			`import time`
			`import argparse`
			`import signal`
			`import sys`
			`import os`
			`import backend_pb2`
			`import backend_pb2_grpc`
feat(mlx): add mlx backend (#6049) * chore: allow to install with pip Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Make the backend to build and actually work Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * List models from system only Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add script to build darwin python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Run protogen in libbackend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Detect if mps is available across python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI: try to build backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Index mlx-vlm Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Remove mlx-vlm Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop CI test Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-08-22 06:42:29 +00:00			`import torch`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`from faster_whisper import WhisperModel`

			`import grpc`
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-03-29 22:47:27 +00:00			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))`
			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'common'))`
			`from grpc_auth import get_auth_interceptors`

feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00

			`_ONE_DAY_IN_SECONDS = 60 * 60 * 24`

			`# If MAX_WORKERS are specified in the environment use it, otherwise default to 1`
			`MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))`
			`COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)`

			`# Implement the BackendServicer class with the service methods`
			`class BackendServicer(backend_pb2_grpc.BackendServicer):`
			`"""`
			`BackendServicer is the class that implements the gRPC service`
			`"""`
			`def Health(self, request, context):`
			`return backend_pb2.Reply(message=bytes("OK", 'utf-8'))`
			`def LoadModel(self, request, context):`
			`device = "cpu"`
			`# Get device`
			`# device = "cuda" if request.CUDA else "cpu"`
			`if request.CUDA:`
			`device = "cuda"`
feat(mlx): add mlx backend (#6049) * chore: allow to install with pip Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Make the backend to build and actually work Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * List models from system only Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add script to build darwin python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Run protogen in libbackend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Detect if mps is available across python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI: try to build backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Index mlx-vlm Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Remove mlx-vlm Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop CI test Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-08-22 06:42:29 +00:00			`mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()`
			`if mps_available:`
			`device = "mps"`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`try:`
			`print("Preparing models, please wait", file=sys.stderr)`
feat(api): Add transcribe response format request parameter & adjust STT backends (#8318) * WIP response format implementation for audio transcriptions (cherry picked from commit e271dd764bbc13846accf3beb8b6522153aa276f) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Rework transcript response_format and add more formats (cherry picked from commit 6a93a8f63e2ee5726bca2980b0c9cf4ef8b7aeb8) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Add test and replace go-openai package with official openai go client (cherry picked from commit f25d1a04e46526429c89db4c739e1e65942ca893) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Fix faster-whisper backend and refactor transcription formatting to also work on CLI Signed-off-by: Andres Smith <andressmithdev@pm.me> (cherry picked from commit 69a93977d5e113eb7172bd85a0f918592d3d2168) Signed-off-by: Andres Smith <andressmithdev@pm.me> --------- Signed-off-by: Andres Smith <andressmithdev@pm.me> Co-authored-by: nanoandrew4 <nanoandrew4@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2026-02-01 16:33:17 +00:00			`self.model = WhisperModel(request.Model, device=device, compute_type="default")`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`except Exception as err:`
			`return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")`
			`# Implement your logic here for the LoadModel service`
			`# Replace this with your desired response`
			`return backend_pb2.Result(message="Model loaded successfully", success=True)`

			`def AudioTranscription(self, request, context):`
			`resultSegments = []`
			`text = ""`
			`try:`
			`segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)`
			`id = 0`
			`for segment in segments:`
			`print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))`
feat(api): Add transcribe response format request parameter & adjust STT backends (#8318) * WIP response format implementation for audio transcriptions (cherry picked from commit e271dd764bbc13846accf3beb8b6522153aa276f) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Rework transcript response_format and add more formats (cherry picked from commit 6a93a8f63e2ee5726bca2980b0c9cf4ef8b7aeb8) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Add test and replace go-openai package with official openai go client (cherry picked from commit f25d1a04e46526429c89db4c739e1e65942ca893) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Fix faster-whisper backend and refactor transcription formatting to also work on CLI Signed-off-by: Andres Smith <andressmithdev@pm.me> (cherry picked from commit 69a93977d5e113eb7172bd85a0f918592d3d2168) Signed-off-by: Andres Smith <andressmithdev@pm.me> --------- Signed-off-by: Andres Smith <andressmithdev@pm.me> Co-authored-by: nanoandrew4 <nanoandrew4@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2026-02-01 16:33:17 +00:00			`resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=int(segment.start)1e9, end=int(segment.end)1e9, text=segment.text))`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`text += segment.text`
feat(api): Add transcribe response format request parameter & adjust STT backends (#8318) * WIP response format implementation for audio transcriptions (cherry picked from commit e271dd764bbc13846accf3beb8b6522153aa276f) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Rework transcript response_format and add more formats (cherry picked from commit 6a93a8f63e2ee5726bca2980b0c9cf4ef8b7aeb8) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Add test and replace go-openai package with official openai go client (cherry picked from commit f25d1a04e46526429c89db4c739e1e65942ca893) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Fix faster-whisper backend and refactor transcription formatting to also work on CLI Signed-off-by: Andres Smith <andressmithdev@pm.me> (cherry picked from commit 69a93977d5e113eb7172bd85a0f918592d3d2168) Signed-off-by: Andres Smith <andressmithdev@pm.me> --------- Signed-off-by: Andres Smith <andressmithdev@pm.me> Co-authored-by: nanoandrew4 <nanoandrew4@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2026-02-01 16:33:17 +00:00			`id += 1`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`except Exception as err:`
			`print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)`
feat(api): Add transcribe response format request parameter & adjust STT backends (#8318) * WIP response format implementation for audio transcriptions (cherry picked from commit e271dd764bbc13846accf3beb8b6522153aa276f) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Rework transcript response_format and add more formats (cherry picked from commit 6a93a8f63e2ee5726bca2980b0c9cf4ef8b7aeb8) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Add test and replace go-openai package with official openai go client (cherry picked from commit f25d1a04e46526429c89db4c739e1e65942ca893) Signed-off-by: Andres Smith <andressmithdev@pm.me> * Fix faster-whisper backend and refactor transcription formatting to also work on CLI Signed-off-by: Andres Smith <andressmithdev@pm.me> (cherry picked from commit 69a93977d5e113eb7172bd85a0f918592d3d2168) Signed-off-by: Andres Smith <andressmithdev@pm.me> --------- Signed-off-by: Andres Smith <andressmithdev@pm.me> Co-authored-by: nanoandrew4 <nanoandrew4@gmail.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2026-02-01 16:33:17 +00:00			`raise err`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00
			`return backend_pb2.TranscriptResult(segments=resultSegments, text=text)`

			`def serve(address):`
chore: bump grpc limits to 50MB (#5212) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-04-19 06:53:24 +00:00			`server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),`
			`options=[`
			`('grpc.max_message_length', 50 * 1024 * 1024), # 50MB`
			`('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB`
			`('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB`
feat: add distributed mode (#9124) * feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2026-03-29 22:47:27 +00:00			`],`
			`interceptors=get_auth_interceptors(),`
			`)`
feat(faster-whisper): add backend (#4666) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2025-01-23 07:06:18 +00:00			`backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)`
			`server.add_insecure_port(address)`
			`server.start()`
			`print("Server started. Listening on: " + address, file=sys.stderr)`

			`# Define the signal handler function`
			`def signal_handler(sig, frame):`
			`print("Received termination signal. Shutting down...")`
			`server.stop(0)`
			`sys.exit(0)`

			`# Set the signal handlers for SIGINT and SIGTERM`
			`signal.signal(signal.SIGINT, signal_handler)`
			`signal.signal(signal.SIGTERM, signal_handler)`

			`try:`
			`while True:`
			`time.sleep(_ONE_DAY_IN_SECONDS)`
			`except KeyboardInterrupt:`
			`server.stop(0)`

			`if __name__ == "__main__":`
			`parser = argparse.ArgumentParser(description="Run the gRPC server.")`
			`parser.add_argument(`
			`"--addr", default="localhost:50051", help="The address to bind the server to."`
			`)`
			`args = parser.parse_args()`

			`serve(args.addr)`