package main import ( "fmt" "io" "log" "os" "os/exec" "sync" "syscall" "time" ) // worker tracks the state of a single express worker process type worker struct { port int addr string cmd *exec.Cmd stopping bool mu sync.Mutex } func runExpress(changes <-chan FileChange, numProcesses int, basePort int, pool *WorkerPool) { var currentWorkers []*worker var mu sync.Mutex // Helper to start a worker and monitor it for crashes startWorker := func(port int) *worker { w := &worker{ port: port, addr: fmt.Sprintf("127.0.0.1:%d", port), } // start spawns the worker process. Caller must NOT hold w.mu. start := func() bool { cmd := exec.Command("../backend/run.sh", "--listen", w.addr) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Start(); err != nil { log.Printf("[express:%d] Failed to start: %v", port, err) return false } w.mu.Lock() w.cmd = cmd w.mu.Unlock() log.Printf("[express:%d] Started (pid %d)", port, cmd.Process.Pid) return true } if !start() { return nil } // Monitor and restart on crash go func() { for { w.mu.Lock() currentCmd := w.cmd stopping := w.stopping w.mu.Unlock() if stopping { return } if currentCmd == nil { time.Sleep(time.Second) w.mu.Lock() if w.stopping { w.mu.Unlock() return } w.mu.Unlock() if start() { pool.AddWorker(w.addr) } continue } err := currentCmd.Wait() w.mu.Lock() if w.stopping { w.mu.Unlock() return } w.cmd = nil w.mu.Unlock() // Worker crashed - remove from pool and restart pool.RemoveWorker(w.addr) if err != nil { log.Printf("[express:%d] Process crashed: %v, restarting...", w.port, err) } else { log.Printf("[express:%d] Process exited unexpectedly, restarting...", w.port) } time.Sleep(time.Second) w.mu.Lock() if w.stopping { w.mu.Unlock() return } w.mu.Unlock() if start() { pool.AddWorker(w.addr) } // If start failed, cmd is still nil and next iteration will retry } }() return w } // Helper to stop a worker (for intentional shutdown) stopWorker := func(w *worker) { if w == nil { return } w.mu.Lock() w.stopping = true cmd := w.cmd w.mu.Unlock() if cmd == nil || cmd.Process == nil { return } pid := cmd.Process.Pid log.Printf("[express:%d] Stopping (pid %d)", w.port, pid) cmd.Process.Signal(syscall.SIGTERM) // Wait briefly for graceful shutdown done := make(chan struct{}) go func() { cmd.Wait() close(done) }() select { case <-done: log.Printf("[express:%d] Stopped gracefully (pid %d)", w.port, pid) case <-time.After(5 * time.Second): log.Printf("[express:%d] Force killing (pid %d)", w.port, pid) cmd.Process.Kill() } } // Helper to stop all workers stopAllWorkers := func(workers []*worker) { for _, w := range workers { stopWorker(w) } } // Helper to start all workers and update the worker pool startAllWorkers := func() []*worker { workers := make([]*worker, 0, numProcesses) addresses := make([]string, 0, numProcesses) for i := 0; i < numProcesses; i++ { port := basePort + i w := startWorker(port) if w != nil { workers = append(workers, w) addresses = append(addresses, w.addr) } } // Update the worker pool with new worker addresses pool.SetWorkers(addresses) return workers } // Helper to run the build runBuild := func() bool { log.Printf("[build] Starting ncc build...") cmd := exec.Command("../backend/build.sh") stdout, _ := cmd.StdoutPipe() stderr, _ := cmd.StderrPipe() if err := cmd.Start(); err != nil { log.Printf("[build] Failed to start: %v", err) return false } // Copy output go io.Copy(os.Stdout, stdout) go io.Copy(os.Stderr, stderr) err := cmd.Wait() if err != nil { log.Printf("[build] Failed: %v", err) return false } log.Printf("[build] Success") return true } // Debounce timer var debounceTimer *time.Timer const debounceDelay = 100 * time.Millisecond // Initial build and start log.Printf("[master] Initial build...") if runBuild() { currentWorkers = startAllWorkers() } else { log.Printf("[master] Initial build failed") } for change := range changes { log.Printf("[watch] %s: %s", change.Operation, change.Path) // Reset debounce timer if debounceTimer != nil { debounceTimer.Stop() } debounceTimer = time.AfterFunc(debounceDelay, func() { if !runBuild() { log.Printf("[master] Build failed, keeping current workers") return } mu.Lock() defer mu.Unlock() // Stop all old workers (this sets stopping=true to prevent auto-restart) stopAllWorkers(currentWorkers) // Start all new workers currentWorkers = startAllWorkers() }) } }