kvm/internal/audio/supervisor.go
Alex P e4ed2b8fad refactor(audio): rename audio components for clarity and add validation
Rename audio server/client components to be more specific (AudioOutputServer/Client). Add new validation.go and ipc_common.go files for shared IPC functionality. Improve error handling and cleanup in input/output IPC components.

Disable granular metrics logging to reduce log pollution. Reset metrics on failed start and ensure proper cleanup. Add common IPC message interface and optimized message pool for reuse.
2025-08-26 10:42:25 +00:00

448 lines
11 KiB
Go

//go:build cgo
// +build cgo
package audio
import (
"context"
"fmt"
"os"
"os/exec"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// Restart configuration is now retrieved from centralized config
func getMaxRestartAttempts() int {
return GetConfig().MaxRestartAttempts
}
func getRestartWindow() time.Duration {
return GetConfig().RestartWindow
}
func getRestartDelay() time.Duration {
return GetConfig().RestartDelay
}
func getMaxRestartDelay() time.Duration {
return GetConfig().MaxRestartDelay
}
// AudioOutputSupervisor manages the audio output server subprocess lifecycle
type AudioOutputSupervisor struct {
ctx context.Context
cancel context.CancelFunc
logger *zerolog.Logger
mutex sync.RWMutex
running int32
// Process management
cmd *exec.Cmd
processPID int
// Restart management
restartAttempts []time.Time
lastExitCode int
lastExitTime time.Time
// Channels for coordination
processDone chan struct{}
stopChan chan struct{}
// Process monitoring
processMonitor *ProcessMonitor
// Callbacks
onProcessStart func(pid int)
onProcessExit func(pid int, exitCode int, crashed bool)
onRestart func(attempt int, delay time.Duration)
}
// NewAudioOutputSupervisor creates a new audio output server supervisor
func NewAudioOutputSupervisor() *AudioOutputSupervisor {
ctx, cancel := context.WithCancel(context.Background())
logger := logging.GetDefaultLogger().With().Str("component", "audio-supervisor").Logger()
return &AudioOutputSupervisor{
ctx: ctx,
cancel: cancel,
logger: &logger,
processDone: make(chan struct{}),
stopChan: make(chan struct{}),
processMonitor: GetProcessMonitor(),
}
}
// SetCallbacks sets optional callbacks for process lifecycle events
func (s *AudioOutputSupervisor) SetCallbacks(
onStart func(pid int),
onExit func(pid int, exitCode int, crashed bool),
onRestart func(attempt int, delay time.Duration),
) {
s.mutex.Lock()
defer s.mutex.Unlock()
s.onProcessStart = onStart
s.onProcessExit = onExit
s.onRestart = onRestart
}
// Start begins supervising the audio output server process
func (s *AudioOutputSupervisor) Start() error {
if !atomic.CompareAndSwapInt32(&s.running, 0, 1) {
return fmt.Errorf("supervisor already running")
}
s.logger.Info().Msg("starting audio server supervisor")
// Recreate channels in case they were closed by a previous Stop() call
s.mutex.Lock()
s.processDone = make(chan struct{})
s.stopChan = make(chan struct{})
// Recreate context as well since it might have been cancelled
s.ctx, s.cancel = context.WithCancel(context.Background())
// Reset restart tracking on start
s.restartAttempts = s.restartAttempts[:0]
s.lastExitCode = 0
s.lastExitTime = time.Time{}
s.mutex.Unlock()
// Start the supervision loop
go s.supervisionLoop()
return nil
}
// Stop gracefully stops the audio server and supervisor
func (s *AudioOutputSupervisor) Stop() error {
if !atomic.CompareAndSwapInt32(&s.running, 1, 0) {
return nil // Already stopped
}
s.logger.Info().Msg("stopping audio server supervisor")
// Signal stop and wait for cleanup
close(s.stopChan)
s.cancel()
// Wait for process to exit
select {
case <-s.processDone:
s.logger.Info().Msg("audio server process stopped gracefully")
case <-time.After(GetConfig().SupervisorTimeout):
s.logger.Warn().Msg("audio server process did not stop gracefully, forcing termination")
s.forceKillProcess()
}
return nil
}
// IsRunning returns true if the supervisor is running
func (s *AudioOutputSupervisor) IsRunning() bool {
return atomic.LoadInt32(&s.running) == 1
}
// GetProcessPID returns the current process PID (0 if not running)
func (s *AudioOutputSupervisor) GetProcessPID() int {
s.mutex.RLock()
defer s.mutex.RUnlock()
return s.processPID
}
// GetLastExitInfo returns information about the last process exit
func (s *AudioOutputSupervisor) GetLastExitInfo() (exitCode int, exitTime time.Time) {
s.mutex.RLock()
defer s.mutex.RUnlock()
return s.lastExitCode, s.lastExitTime
}
// GetProcessMetrics returns current process metrics if the process is running
func (s *AudioOutputSupervisor) GetProcessMetrics() *ProcessMetrics {
s.mutex.RLock()
pid := s.processPID
s.mutex.RUnlock()
if pid == 0 {
return nil
}
metrics := s.processMonitor.GetCurrentMetrics()
for _, metric := range metrics {
if metric.PID == pid {
return &metric
}
}
return nil
}
// supervisionLoop is the main supervision loop
func (s *AudioOutputSupervisor) supervisionLoop() {
defer func() {
close(s.processDone)
s.logger.Info().Msg("audio server supervision ended")
}()
for atomic.LoadInt32(&s.running) == 1 {
select {
case <-s.stopChan:
s.logger.Info().Msg("received stop signal")
s.terminateProcess()
return
case <-s.ctx.Done():
s.logger.Info().Msg("context cancelled")
s.terminateProcess()
return
default:
// Start or restart the process
if err := s.startProcess(); err != nil {
s.logger.Error().Err(err).Msg("failed to start audio server process")
// Check if we should attempt restart
if !s.shouldRestart() {
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
return
}
delay := s.calculateRestartDelay()
s.logger.Warn().Dur("delay", delay).Msg("retrying process start after delay")
if s.onRestart != nil {
s.onRestart(len(s.restartAttempts), delay)
}
select {
case <-time.After(delay):
case <-s.stopChan:
return
case <-s.ctx.Done():
return
}
continue
}
// Wait for process to exit
s.waitForProcessExit()
// Check if we should restart
if !s.shouldRestart() {
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
return
}
// Calculate restart delay
delay := s.calculateRestartDelay()
s.logger.Info().Dur("delay", delay).Msg("restarting audio server process after delay")
if s.onRestart != nil {
s.onRestart(len(s.restartAttempts), delay)
}
// Wait for restart delay
select {
case <-time.After(delay):
case <-s.stopChan:
return
case <-s.ctx.Done():
return
}
}
}
}
// startProcess starts the audio server process
func (s *AudioOutputSupervisor) startProcess() error {
execPath, err := os.Executable()
if err != nil {
return fmt.Errorf("failed to get executable path: %w", err)
}
s.mutex.Lock()
defer s.mutex.Unlock()
// Create new command
s.cmd = exec.CommandContext(s.ctx, execPath, "--audio-output-server")
s.cmd.Stdout = os.Stdout
s.cmd.Stderr = os.Stderr
// Start the process
if err := s.cmd.Start(); err != nil {
return fmt.Errorf("failed to start audio output server process: %w", err)
}
s.processPID = s.cmd.Process.Pid
s.logger.Info().Int("pid", s.processPID).Msg("audio server process started")
// Add process to monitoring
s.processMonitor.AddProcess(s.processPID, "audio-output-server")
if s.onProcessStart != nil {
s.onProcessStart(s.processPID)
}
return nil
}
// waitForProcessExit waits for the current process to exit and logs the result
func (s *AudioOutputSupervisor) waitForProcessExit() {
s.mutex.RLock()
cmd := s.cmd
pid := s.processPID
s.mutex.RUnlock()
if cmd == nil {
return
}
// Wait for process to exit
err := cmd.Wait()
s.mutex.Lock()
s.lastExitTime = time.Now()
s.processPID = 0
var exitCode int
var crashed bool
if err != nil {
if exitError, ok := err.(*exec.ExitError); ok {
exitCode = exitError.ExitCode()
crashed = exitCode != 0
} else {
// Process was killed or other error
exitCode = -1
crashed = true
}
} else {
exitCode = 0
crashed = false
}
s.lastExitCode = exitCode
s.mutex.Unlock()
// Remove process from monitoring
s.processMonitor.RemoveProcess(pid)
if crashed {
s.logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msg("audio server process crashed")
s.recordRestartAttempt()
} else {
s.logger.Info().Int("pid", pid).Msg("audio server process exited gracefully")
}
if s.onProcessExit != nil {
s.onProcessExit(pid, exitCode, crashed)
}
}
// terminateProcess gracefully terminates the current process
func (s *AudioOutputSupervisor) terminateProcess() {
s.mutex.RLock()
cmd := s.cmd
pid := s.processPID
s.mutex.RUnlock()
if cmd == nil || cmd.Process == nil {
return
}
s.logger.Info().Int("pid", pid).Msg("terminating audio server process")
// Send SIGTERM first
if err := cmd.Process.Signal(syscall.SIGTERM); err != nil {
s.logger.Warn().Err(err).Int("pid", pid).Msg("failed to send SIGTERM")
}
// Wait for graceful shutdown
done := make(chan struct{})
go func() {
_ = cmd.Wait()
close(done)
}()
select {
case <-done:
s.logger.Info().Int("pid", pid).Msg("audio server process terminated gracefully")
case <-time.After(GetConfig().OutputSupervisorTimeout):
s.logger.Warn().Int("pid", pid).Msg("process did not terminate gracefully, sending SIGKILL")
s.forceKillProcess()
}
}
// forceKillProcess forcefully kills the current process
func (s *AudioOutputSupervisor) forceKillProcess() {
s.mutex.RLock()
cmd := s.cmd
pid := s.processPID
s.mutex.RUnlock()
if cmd == nil || cmd.Process == nil {
return
}
s.logger.Warn().Int("pid", pid).Msg("force killing audio server process")
if err := cmd.Process.Kill(); err != nil {
s.logger.Error().Err(err).Int("pid", pid).Msg("failed to kill process")
}
}
// shouldRestart determines if the process should be restarted
func (s *AudioOutputSupervisor) shouldRestart() bool {
if atomic.LoadInt32(&s.running) == 0 {
return false // Supervisor is stopping
}
s.mutex.RLock()
defer s.mutex.RUnlock()
// Clean up old restart attempts outside the window
now := time.Now()
var recentAttempts []time.Time
for _, attempt := range s.restartAttempts {
if now.Sub(attempt) < getRestartWindow() {
recentAttempts = append(recentAttempts, attempt)
}
}
s.restartAttempts = recentAttempts
return len(s.restartAttempts) < getMaxRestartAttempts()
}
// recordRestartAttempt records a restart attempt
func (s *AudioOutputSupervisor) recordRestartAttempt() {
s.mutex.Lock()
defer s.mutex.Unlock()
s.restartAttempts = append(s.restartAttempts, time.Now())
}
// calculateRestartDelay calculates the delay before next restart attempt
func (s *AudioOutputSupervisor) calculateRestartDelay() time.Duration {
s.mutex.RLock()
defer s.mutex.RUnlock()
// Exponential backoff based on recent restart attempts
attempts := len(s.restartAttempts)
if attempts == 0 {
return getRestartDelay()
}
// Calculate exponential backoff: 2^attempts * base delay
delay := getRestartDelay()
for i := 0; i < attempts && delay < getMaxRestartDelay(); i++ {
delay *= 2
}
if delay > getMaxRestartDelay() {
delay = getMaxRestartDelay()
}
return delay
}