mirror of
https://github.com/jetkvm/kvm.git
synced 2025-09-16 08:38:14 +00:00
Rename audio server/client components to be more specific (AudioOutputServer/Client). Add new validation.go and ipc_common.go files for shared IPC functionality. Improve error handling and cleanup in input/output IPC components. Disable granular metrics logging to reduce log pollution. Reset metrics on failed start and ensure proper cleanup. Add common IPC message interface and optimized message pool for reuse.
448 lines
11 KiB
Go
448 lines
11 KiB
Go
//go:build cgo
|
|
// +build cgo
|
|
|
|
package audio
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"sync"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/jetkvm/kvm/internal/logging"
|
|
"github.com/rs/zerolog"
|
|
)
|
|
|
|
// Restart configuration is now retrieved from centralized config
|
|
func getMaxRestartAttempts() int {
|
|
return GetConfig().MaxRestartAttempts
|
|
}
|
|
|
|
func getRestartWindow() time.Duration {
|
|
return GetConfig().RestartWindow
|
|
}
|
|
|
|
func getRestartDelay() time.Duration {
|
|
return GetConfig().RestartDelay
|
|
}
|
|
|
|
func getMaxRestartDelay() time.Duration {
|
|
return GetConfig().MaxRestartDelay
|
|
}
|
|
|
|
// AudioOutputSupervisor manages the audio output server subprocess lifecycle
|
|
type AudioOutputSupervisor struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
logger *zerolog.Logger
|
|
mutex sync.RWMutex
|
|
running int32
|
|
|
|
// Process management
|
|
cmd *exec.Cmd
|
|
processPID int
|
|
|
|
// Restart management
|
|
restartAttempts []time.Time
|
|
lastExitCode int
|
|
lastExitTime time.Time
|
|
|
|
// Channels for coordination
|
|
processDone chan struct{}
|
|
stopChan chan struct{}
|
|
|
|
// Process monitoring
|
|
processMonitor *ProcessMonitor
|
|
|
|
// Callbacks
|
|
onProcessStart func(pid int)
|
|
onProcessExit func(pid int, exitCode int, crashed bool)
|
|
onRestart func(attempt int, delay time.Duration)
|
|
}
|
|
|
|
// NewAudioOutputSupervisor creates a new audio output server supervisor
|
|
func NewAudioOutputSupervisor() *AudioOutputSupervisor {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
logger := logging.GetDefaultLogger().With().Str("component", "audio-supervisor").Logger()
|
|
|
|
return &AudioOutputSupervisor{
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
logger: &logger,
|
|
processDone: make(chan struct{}),
|
|
stopChan: make(chan struct{}),
|
|
processMonitor: GetProcessMonitor(),
|
|
}
|
|
}
|
|
|
|
// SetCallbacks sets optional callbacks for process lifecycle events
|
|
func (s *AudioOutputSupervisor) SetCallbacks(
|
|
onStart func(pid int),
|
|
onExit func(pid int, exitCode int, crashed bool),
|
|
onRestart func(attempt int, delay time.Duration),
|
|
) {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
s.onProcessStart = onStart
|
|
s.onProcessExit = onExit
|
|
s.onRestart = onRestart
|
|
}
|
|
|
|
// Start begins supervising the audio output server process
|
|
func (s *AudioOutputSupervisor) Start() error {
|
|
if !atomic.CompareAndSwapInt32(&s.running, 0, 1) {
|
|
return fmt.Errorf("supervisor already running")
|
|
}
|
|
|
|
s.logger.Info().Msg("starting audio server supervisor")
|
|
|
|
// Recreate channels in case they were closed by a previous Stop() call
|
|
s.mutex.Lock()
|
|
s.processDone = make(chan struct{})
|
|
s.stopChan = make(chan struct{})
|
|
// Recreate context as well since it might have been cancelled
|
|
s.ctx, s.cancel = context.WithCancel(context.Background())
|
|
// Reset restart tracking on start
|
|
s.restartAttempts = s.restartAttempts[:0]
|
|
s.lastExitCode = 0
|
|
s.lastExitTime = time.Time{}
|
|
s.mutex.Unlock()
|
|
|
|
// Start the supervision loop
|
|
go s.supervisionLoop()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Stop gracefully stops the audio server and supervisor
|
|
func (s *AudioOutputSupervisor) Stop() error {
|
|
if !atomic.CompareAndSwapInt32(&s.running, 1, 0) {
|
|
return nil // Already stopped
|
|
}
|
|
|
|
s.logger.Info().Msg("stopping audio server supervisor")
|
|
|
|
// Signal stop and wait for cleanup
|
|
close(s.stopChan)
|
|
s.cancel()
|
|
|
|
// Wait for process to exit
|
|
select {
|
|
case <-s.processDone:
|
|
s.logger.Info().Msg("audio server process stopped gracefully")
|
|
case <-time.After(GetConfig().SupervisorTimeout):
|
|
s.logger.Warn().Msg("audio server process did not stop gracefully, forcing termination")
|
|
s.forceKillProcess()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// IsRunning returns true if the supervisor is running
|
|
func (s *AudioOutputSupervisor) IsRunning() bool {
|
|
return atomic.LoadInt32(&s.running) == 1
|
|
}
|
|
|
|
// GetProcessPID returns the current process PID (0 if not running)
|
|
func (s *AudioOutputSupervisor) GetProcessPID() int {
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
return s.processPID
|
|
}
|
|
|
|
// GetLastExitInfo returns information about the last process exit
|
|
func (s *AudioOutputSupervisor) GetLastExitInfo() (exitCode int, exitTime time.Time) {
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
return s.lastExitCode, s.lastExitTime
|
|
}
|
|
|
|
// GetProcessMetrics returns current process metrics if the process is running
|
|
func (s *AudioOutputSupervisor) GetProcessMetrics() *ProcessMetrics {
|
|
s.mutex.RLock()
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if pid == 0 {
|
|
return nil
|
|
}
|
|
|
|
metrics := s.processMonitor.GetCurrentMetrics()
|
|
for _, metric := range metrics {
|
|
if metric.PID == pid {
|
|
return &metric
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// supervisionLoop is the main supervision loop
|
|
func (s *AudioOutputSupervisor) supervisionLoop() {
|
|
defer func() {
|
|
close(s.processDone)
|
|
s.logger.Info().Msg("audio server supervision ended")
|
|
}()
|
|
|
|
for atomic.LoadInt32(&s.running) == 1 {
|
|
select {
|
|
case <-s.stopChan:
|
|
s.logger.Info().Msg("received stop signal")
|
|
s.terminateProcess()
|
|
return
|
|
case <-s.ctx.Done():
|
|
s.logger.Info().Msg("context cancelled")
|
|
s.terminateProcess()
|
|
return
|
|
default:
|
|
// Start or restart the process
|
|
if err := s.startProcess(); err != nil {
|
|
s.logger.Error().Err(err).Msg("failed to start audio server process")
|
|
|
|
// Check if we should attempt restart
|
|
if !s.shouldRestart() {
|
|
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
|
|
return
|
|
}
|
|
|
|
delay := s.calculateRestartDelay()
|
|
s.logger.Warn().Dur("delay", delay).Msg("retrying process start after delay")
|
|
|
|
if s.onRestart != nil {
|
|
s.onRestart(len(s.restartAttempts), delay)
|
|
}
|
|
|
|
select {
|
|
case <-time.After(delay):
|
|
case <-s.stopChan:
|
|
return
|
|
case <-s.ctx.Done():
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Wait for process to exit
|
|
s.waitForProcessExit()
|
|
|
|
// Check if we should restart
|
|
if !s.shouldRestart() {
|
|
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
|
|
return
|
|
}
|
|
|
|
// Calculate restart delay
|
|
delay := s.calculateRestartDelay()
|
|
s.logger.Info().Dur("delay", delay).Msg("restarting audio server process after delay")
|
|
|
|
if s.onRestart != nil {
|
|
s.onRestart(len(s.restartAttempts), delay)
|
|
}
|
|
|
|
// Wait for restart delay
|
|
select {
|
|
case <-time.After(delay):
|
|
case <-s.stopChan:
|
|
return
|
|
case <-s.ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// startProcess starts the audio server process
|
|
func (s *AudioOutputSupervisor) startProcess() error {
|
|
execPath, err := os.Executable()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get executable path: %w", err)
|
|
}
|
|
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
// Create new command
|
|
s.cmd = exec.CommandContext(s.ctx, execPath, "--audio-output-server")
|
|
s.cmd.Stdout = os.Stdout
|
|
s.cmd.Stderr = os.Stderr
|
|
|
|
// Start the process
|
|
if err := s.cmd.Start(); err != nil {
|
|
return fmt.Errorf("failed to start audio output server process: %w", err)
|
|
}
|
|
|
|
s.processPID = s.cmd.Process.Pid
|
|
s.logger.Info().Int("pid", s.processPID).Msg("audio server process started")
|
|
|
|
// Add process to monitoring
|
|
s.processMonitor.AddProcess(s.processPID, "audio-output-server")
|
|
|
|
if s.onProcessStart != nil {
|
|
s.onProcessStart(s.processPID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// waitForProcessExit waits for the current process to exit and logs the result
|
|
func (s *AudioOutputSupervisor) waitForProcessExit() {
|
|
s.mutex.RLock()
|
|
cmd := s.cmd
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if cmd == nil {
|
|
return
|
|
}
|
|
|
|
// Wait for process to exit
|
|
err := cmd.Wait()
|
|
|
|
s.mutex.Lock()
|
|
s.lastExitTime = time.Now()
|
|
s.processPID = 0
|
|
|
|
var exitCode int
|
|
var crashed bool
|
|
|
|
if err != nil {
|
|
if exitError, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitError.ExitCode()
|
|
crashed = exitCode != 0
|
|
} else {
|
|
// Process was killed or other error
|
|
exitCode = -1
|
|
crashed = true
|
|
}
|
|
} else {
|
|
exitCode = 0
|
|
crashed = false
|
|
}
|
|
|
|
s.lastExitCode = exitCode
|
|
s.mutex.Unlock()
|
|
|
|
// Remove process from monitoring
|
|
s.processMonitor.RemoveProcess(pid)
|
|
|
|
if crashed {
|
|
s.logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msg("audio server process crashed")
|
|
s.recordRestartAttempt()
|
|
} else {
|
|
s.logger.Info().Int("pid", pid).Msg("audio server process exited gracefully")
|
|
}
|
|
|
|
if s.onProcessExit != nil {
|
|
s.onProcessExit(pid, exitCode, crashed)
|
|
}
|
|
}
|
|
|
|
// terminateProcess gracefully terminates the current process
|
|
func (s *AudioOutputSupervisor) terminateProcess() {
|
|
s.mutex.RLock()
|
|
cmd := s.cmd
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if cmd == nil || cmd.Process == nil {
|
|
return
|
|
}
|
|
|
|
s.logger.Info().Int("pid", pid).Msg("terminating audio server process")
|
|
|
|
// Send SIGTERM first
|
|
if err := cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
|
s.logger.Warn().Err(err).Int("pid", pid).Msg("failed to send SIGTERM")
|
|
}
|
|
|
|
// Wait for graceful shutdown
|
|
done := make(chan struct{})
|
|
go func() {
|
|
_ = cmd.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
s.logger.Info().Int("pid", pid).Msg("audio server process terminated gracefully")
|
|
case <-time.After(GetConfig().OutputSupervisorTimeout):
|
|
s.logger.Warn().Int("pid", pid).Msg("process did not terminate gracefully, sending SIGKILL")
|
|
s.forceKillProcess()
|
|
}
|
|
}
|
|
|
|
// forceKillProcess forcefully kills the current process
|
|
func (s *AudioOutputSupervisor) forceKillProcess() {
|
|
s.mutex.RLock()
|
|
cmd := s.cmd
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if cmd == nil || cmd.Process == nil {
|
|
return
|
|
}
|
|
|
|
s.logger.Warn().Int("pid", pid).Msg("force killing audio server process")
|
|
if err := cmd.Process.Kill(); err != nil {
|
|
s.logger.Error().Err(err).Int("pid", pid).Msg("failed to kill process")
|
|
}
|
|
}
|
|
|
|
// shouldRestart determines if the process should be restarted
|
|
func (s *AudioOutputSupervisor) shouldRestart() bool {
|
|
if atomic.LoadInt32(&s.running) == 0 {
|
|
return false // Supervisor is stopping
|
|
}
|
|
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
|
|
// Clean up old restart attempts outside the window
|
|
now := time.Now()
|
|
var recentAttempts []time.Time
|
|
for _, attempt := range s.restartAttempts {
|
|
if now.Sub(attempt) < getRestartWindow() {
|
|
recentAttempts = append(recentAttempts, attempt)
|
|
}
|
|
}
|
|
s.restartAttempts = recentAttempts
|
|
|
|
return len(s.restartAttempts) < getMaxRestartAttempts()
|
|
}
|
|
|
|
// recordRestartAttempt records a restart attempt
|
|
func (s *AudioOutputSupervisor) recordRestartAttempt() {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
s.restartAttempts = append(s.restartAttempts, time.Now())
|
|
}
|
|
|
|
// calculateRestartDelay calculates the delay before next restart attempt
|
|
func (s *AudioOutputSupervisor) calculateRestartDelay() time.Duration {
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
|
|
// Exponential backoff based on recent restart attempts
|
|
attempts := len(s.restartAttempts)
|
|
if attempts == 0 {
|
|
return getRestartDelay()
|
|
}
|
|
|
|
// Calculate exponential backoff: 2^attempts * base delay
|
|
delay := getRestartDelay()
|
|
for i := 0; i < attempts && delay < getMaxRestartDelay(); i++ {
|
|
delay *= 2
|
|
}
|
|
|
|
if delay > getMaxRestartDelay() {
|
|
delay = getMaxRestartDelay()
|
|
}
|
|
|
|
return delay
|
|
}
|