Switch from Swarm labels to Traefik file provider for routing
ci/woodpecker/push/woodpecker Pipeline was successful Details

docker service update --label-add was restarting the tunnel-server
container on every label change, breaking all active SSH tunnels.

Now the server writes YAML config files to /root/traefik/dynamic/ on
the Traefik host via SSH. Traefik's file provider watches the directory
and picks up changes without any container restarts. Clients can
reconnect reliably after server restarts with no restart loops.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Leopere 2026-02-09 15:06:21 -05:00
parent 2f8d35903c
commit 64347ce8a5
Signed by: colin
SSH Key Fingerprint: SHA256:nRPCQTeMFLdGytxRQmPVK9VXY3/ePKQ5lGRyJhT5DY8
2 changed files with 78 additions and 98 deletions

View File

@ -49,7 +49,7 @@ func main() {
portStart := envInt("PORT_RANGE_START", 10000) portStart := envInt("PORT_RANGE_START", 10000)
portEnd := envInt("PORT_RANGE_END", 10100) portEnd := envInt("PORT_RANGE_END", 10100)
// Swarm manager SSH config (for updating service labels). // Traefik host SSH config (for writing dynamic config files).
traefikHost := envRequired("TRAEFIK_SSH_HOST") traefikHost := envRequired("TRAEFIK_SSH_HOST")
traefikUser := envOr("TRAEFIK_SSH_USER", "root") traefikUser := envOr("TRAEFIK_SSH_USER", "root")
traefikKey := envRequired("TRAEFIK_SSH_KEY") traefikKey := envRequired("TRAEFIK_SSH_KEY")
@ -62,13 +62,13 @@ func main() {
if err != nil { if err != nil {
log.Fatalf("Failed to load Traefik SSH key: %v", err) log.Fatalf("Failed to load Traefik SSH key: %v", err)
} }
log.Printf("Loaded Swarm manager SSH key") log.Printf("Loaded Traefik host SSH key")
// Initialize port pool. // Initialize port pool.
pool := server.NewPortPool(portStart, portEnd) pool := server.NewPortPool(portStart, portEnd)
log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1) log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1)
// Initialize label manager (Swarm service update via SSH). // Initialize label manager (Traefik file provider via SSH).
labels, err := server.NewLabelManager( labels, err := server.NewLabelManager(
traefikHost, traefikUser, traefikSigner, traefikHost, traefikUser, traefikSigner,
serviceName, entrypoint, certResolver, serviceName, entrypoint, certResolver,

View File

@ -10,26 +10,30 @@ import (
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
) )
// LabelManager manages Traefik routing labels on its own Swarm service // LabelManager manages Traefik dynamic configuration by writing YAML config
// by SSHing into the Swarm manager and running docker service update. // files to the Traefik host via SSH. This uses the Traefik file provider
// (--providers.file.directory + --providers.file.watch=true) and avoids
// docker service update which restarts the server container.
type LabelManager struct { type LabelManager struct {
mu sync.Mutex mu sync.Mutex
remoteHost string // Swarm manager, e.g. "ingress.nixc.us" remoteHost string // Traefik host, e.g. "ingress.nixc.us:65522"
remoteUser string // SSH user remoteUser string // SSH user
signer ssh.Signer signer ssh.Signer
serviceName string // Swarm service name, e.g. "better-argo-tunnels_tunnel-server" serviceName string // Swarm service name (used for backend URL)
entrypoint string // e.g. "websecure" entrypoint string // e.g. "websecure"
certResolver string // e.g. "letsencryptresolver" certResolver string // e.g. "letsencryptresolver"
labels map[string]bool // track which tunnel keys we've added configDir string // remote dir for Traefik dynamic configs
authLabels map[string]bool // track which tunnel keys have auth middleware configs map[string]bool
} }
// NewLabelManager creates a label manager that updates Swarm service labels via SSH. // NewLabelManager creates a manager that writes Traefik file-provider configs
// to the remote host via SSH.
func NewLabelManager( func NewLabelManager(
remoteHost, remoteUser string, remoteHost, remoteUser string,
signer ssh.Signer, signer ssh.Signer,
serviceName, entrypoint, certResolver string, serviceName, entrypoint, certResolver string,
) (*LabelManager, error) { ) (*LabelManager, error) {
configDir := "/root/traefik/dynamic"
lm := &LabelManager{ lm := &LabelManager{
remoteHost: remoteHost, remoteHost: remoteHost,
@ -38,132 +42,113 @@ func NewLabelManager(
serviceName: serviceName, serviceName: serviceName,
entrypoint: entrypoint, entrypoint: entrypoint,
certResolver: certResolver, certResolver: certResolver,
labels: make(map[string]bool), configDir: configDir,
authLabels: make(map[string]bool), configs: make(map[string]bool),
} }
// Verify we can reach the Swarm manager and the service exists. // Ensure the config directory exists.
cmd := fmt.Sprintf("docker service inspect --format '{{.Spec.Name}}' %s", serviceName) cmd := fmt.Sprintf("mkdir -p %s", configDir)
if err := lm.runRemote(cmd); err != nil { if err := lm.runRemote(cmd); err != nil {
log.Printf("WARN: could not verify service %s (may not exist yet): %v", serviceName, err) log.Printf("WARN: could not ensure config dir %s: %v", configDir, err)
} else {
log.Printf("Verified Swarm service: %s", serviceName)
} }
log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s)", log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s, dir=%s)",
remoteHost, serviceName, entrypoint, certResolver) remoteHost, serviceName, entrypoint, certResolver, configDir)
return lm, nil return lm, nil
} }
// Add adds Traefik routing labels to the Swarm service for a tunnel. // Add writes a Traefik dynamic config YAML to the remote host for a tunnel.
// If authUser and authPass are non-empty, a basicauth middleware is also added. // If authUser and authPass are non-empty, a basicauth middleware is included.
func (lm *LabelManager) Add(tunKey, domain string, port int, authUser, authPass string) error { func (lm *LabelManager) Add(tunKey, domain string, port int, authUser, authPass string) error {
lm.mu.Lock() lm.mu.Lock()
defer lm.mu.Unlock() defer lm.mu.Unlock()
routerName := fmt.Sprintf("tunnel-%s-router", tunKey) routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey) svcName := fmt.Sprintf("tunnel-%s-service", tunKey)
middlewareName := fmt.Sprintf("tunnel-%s-auth", tunKey)
// Build the label-add flags for docker service update. // Build optional middleware section.
labelArgs := []string{ var middlewareYAML string
labelFlag(fmt.Sprintf("traefik.http.routers.%s.rule", routerName), var routerMiddleware string
fmt.Sprintf("Host(`%s`)", domain)),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
lm.entrypoint),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
"true"),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
lm.certResolver),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.service", routerName),
serviceName),
labelFlag(fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
fmt.Sprintf("%d", port)),
}
// If auth credentials are provided, add basicauth middleware labels.
if authUser != "" && authPass != "" { if authUser != "" && authPass != "" {
middlewareName := fmt.Sprintf("tunnel-%s-auth", tunKey)
htpasswd, err := generateHTPasswd(authUser, authPass) htpasswd, err := generateHTPasswd(authUser, authPass)
if err != nil { if err != nil {
return fmt.Errorf("generate htpasswd for %s: %w", domain, err) return fmt.Errorf("generate htpasswd for %s: %w", domain, err)
} }
labelArgs = append(labelArgs, middlewareYAML = fmt.Sprintf(
labelFlag( " middlewares:\n %s:\n basicAuth:\n users:\n - %q\n",
fmt.Sprintf("traefik.http.middlewares.%s.basicauth.users", middlewareName), middlewareName, htpasswd)
htpasswd, routerMiddleware = fmt.Sprintf(
), "\n middlewares:\n - %s", middlewareName)
labelFlag( log.Printf("BasicAuth middleware %s configured for %s", middlewareName, domain)
fmt.Sprintf("traefik.http.routers.%s.middlewares", routerName),
middlewareName,
),
)
lm.authLabels[tunKey] = true
log.Printf("BasicAuth middleware %s added for %s", middlewareName, domain)
} }
cmd := fmt.Sprintf("docker service update --label-add %s %s", yaml := fmt.Sprintf(
strings.Join(labelArgs, " --label-add "), lm.serviceName) "http:\n"+
" routers:\n"+
" %s:\n"+
" rule: \"Host(`%s`)\"\n"+
" entryPoints:\n"+
" - %s\n"+
" tls:\n"+
" certResolver: %s\n"+
" service: %s%s\n"+
" services:\n"+
" %s:\n"+
" loadBalancer:\n"+
" servers:\n"+
" - url: \"http://%s:%d\"\n"+
"%s",
routerName,
domain,
lm.entrypoint,
lm.certResolver,
svcName,
routerMiddleware,
svcName,
lm.serviceName,
port,
middlewareYAML,
)
filePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
// Write via heredoc so we don't need to escape anything.
cmd := fmt.Sprintf("cat > '%s' << 'TRAEFIKEOF'\n%sTRAEFIKEOF", filePath, yaml)
if err := lm.runRemote(cmd); err != nil { if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("add labels for %s: %w", domain, err) return fmt.Errorf("write config for %s: %w", domain, err)
} }
lm.labels[tunKey] = true lm.configs[tunKey] = true
log.Printf("Added Swarm labels: %s -> %s:%d", domain, lm.serviceName, port) log.Printf("Wrote Traefik config: %s -> %s:%d (%s)", domain, lm.serviceName, port, filePath)
return nil return nil
} }
// Remove removes Traefik routing labels from the Swarm service for a tunnel. // Remove deletes the Traefik dynamic config file for a tunnel.
func (lm *LabelManager) Remove(tunKey string) error { func (lm *LabelManager) Remove(tunKey string) error {
lm.mu.Lock() lm.mu.Lock()
defer lm.mu.Unlock() defer lm.mu.Unlock()
if !lm.labels[tunKey] { if !lm.configs[tunKey] {
return nil // nothing to remove return nil
} }
routerName := fmt.Sprintf("tunnel-%s-router", tunKey) filePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey) cmd := fmt.Sprintf("rm -f '%s'", filePath)
middlewareName := fmt.Sprintf("tunnel-%s-auth", tunKey)
// Build the label-rm flags.
rmLabels := []string{
fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
fmt.Sprintf("traefik.http.routers.%s.service", routerName),
fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
}
// Remove auth middleware labels if they were added.
if lm.authLabels[tunKey] {
rmLabels = append(rmLabels,
fmt.Sprintf("traefik.http.middlewares.%s.basicauth.users", middlewareName),
fmt.Sprintf("traefik.http.routers.%s.middlewares", routerName),
)
delete(lm.authLabels, tunKey)
log.Printf("Removing BasicAuth middleware %s", middlewareName)
}
cmd := fmt.Sprintf("docker service update --label-rm %s %s",
strings.Join(rmLabels, " --label-rm "), lm.serviceName)
if err := lm.runRemote(cmd); err != nil { if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("remove labels for %s: %w", tunKey, err) return fmt.Errorf("remove config for %s: %w", tunKey, err)
} }
delete(lm.labels, tunKey) delete(lm.configs, tunKey)
log.Printf("Removed Swarm labels for tunnel: %s", tunKey) log.Printf("Removed Traefik config: %s", filePath)
return nil return nil
} }
// generateHTPasswd creates a bcrypt-hashed htpasswd entry for Traefik basicauth. // generateHTPasswd creates a bcrypt-hashed htpasswd entry for Traefik basicauth.
// The output format is user:$hash. Dollar signs are NOT doubled here because
// we pass labels via docker service update with single-quoted values, which
// preserves them literally. Doubling is only needed in compose files.
func generateHTPasswd(user, pass string) (string, error) { func generateHTPasswd(user, pass string) (string, error) {
hash, err := bcrypt.GenerateFromPassword([]byte(pass), bcrypt.DefaultCost) hash, err := bcrypt.GenerateFromPassword([]byte(pass), bcrypt.DefaultCost)
if err != nil { if err != nil {
@ -172,12 +157,7 @@ func generateHTPasswd(user, pass string) (string, error) {
return fmt.Sprintf("%s:%s", user, string(hash)), nil return fmt.Sprintf("%s:%s", user, string(hash)), nil
} }
// labelFlag formats a --label-add value, quoting properly for shell. // runRemote executes a command on the Traefik host via SSH.
func labelFlag(key, value string) string {
return fmt.Sprintf("'%s=%s'", key, value)
}
// runRemote executes a command on the Swarm manager via SSH.
func (lm *LabelManager) runRemote(cmd string) error { func (lm *LabelManager) runRemote(cmd string) error {
addr := lm.remoteHost addr := lm.remoteHost
if !strings.Contains(addr, ":") { if !strings.Contains(addr, ":") {