Switch from Swarm labels to Traefik file provider for routing
ci/woodpecker/push/woodpecker Pipeline was successful
Details
ci/woodpecker/push/woodpecker Pipeline was successful
Details
docker service update --label-add was restarting the tunnel-server container on every label change, breaking all active SSH tunnels. Now the server writes YAML config files to /root/traefik/dynamic/ on the Traefik host via SSH. Traefik's file provider watches the directory and picks up changes without any container restarts. Clients can reconnect reliably after server restarts with no restart loops. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
2f8d35903c
commit
64347ce8a5
|
|
@ -49,7 +49,7 @@ func main() {
|
|||
portStart := envInt("PORT_RANGE_START", 10000)
|
||||
portEnd := envInt("PORT_RANGE_END", 10100)
|
||||
|
||||
// Swarm manager SSH config (for updating service labels).
|
||||
// Traefik host SSH config (for writing dynamic config files).
|
||||
traefikHost := envRequired("TRAEFIK_SSH_HOST")
|
||||
traefikUser := envOr("TRAEFIK_SSH_USER", "root")
|
||||
traefikKey := envRequired("TRAEFIK_SSH_KEY")
|
||||
|
|
@ -62,13 +62,13 @@ func main() {
|
|||
if err != nil {
|
||||
log.Fatalf("Failed to load Traefik SSH key: %v", err)
|
||||
}
|
||||
log.Printf("Loaded Swarm manager SSH key")
|
||||
log.Printf("Loaded Traefik host SSH key")
|
||||
|
||||
// Initialize port pool.
|
||||
pool := server.NewPortPool(portStart, portEnd)
|
||||
log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1)
|
||||
|
||||
// Initialize label manager (Swarm service update via SSH).
|
||||
// Initialize label manager (Traefik file provider via SSH).
|
||||
labels, err := server.NewLabelManager(
|
||||
traefikHost, traefikUser, traefikSigner,
|
||||
serviceName, entrypoint, certResolver,
|
||||
|
|
|
|||
|
|
@ -10,26 +10,30 @@ import (
|
|||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// LabelManager manages Traefik routing labels on its own Swarm service
|
||||
// by SSHing into the Swarm manager and running docker service update.
|
||||
// LabelManager manages Traefik dynamic configuration by writing YAML config
|
||||
// files to the Traefik host via SSH. This uses the Traefik file provider
|
||||
// (--providers.file.directory + --providers.file.watch=true) and avoids
|
||||
// docker service update which restarts the server container.
|
||||
type LabelManager struct {
|
||||
mu sync.Mutex
|
||||
remoteHost string // Swarm manager, e.g. "ingress.nixc.us"
|
||||
remoteHost string // Traefik host, e.g. "ingress.nixc.us:65522"
|
||||
remoteUser string // SSH user
|
||||
signer ssh.Signer
|
||||
serviceName string // Swarm service name, e.g. "better-argo-tunnels_tunnel-server"
|
||||
serviceName string // Swarm service name (used for backend URL)
|
||||
entrypoint string // e.g. "websecure"
|
||||
certResolver string // e.g. "letsencryptresolver"
|
||||
labels map[string]bool // track which tunnel keys we've added
|
||||
authLabels map[string]bool // track which tunnel keys have auth middleware
|
||||
configDir string // remote dir for Traefik dynamic configs
|
||||
configs map[string]bool
|
||||
}
|
||||
|
||||
// NewLabelManager creates a label manager that updates Swarm service labels via SSH.
|
||||
// NewLabelManager creates a manager that writes Traefik file-provider configs
|
||||
// to the remote host via SSH.
|
||||
func NewLabelManager(
|
||||
remoteHost, remoteUser string,
|
||||
signer ssh.Signer,
|
||||
serviceName, entrypoint, certResolver string,
|
||||
) (*LabelManager, error) {
|
||||
configDir := "/root/traefik/dynamic"
|
||||
|
||||
lm := &LabelManager{
|
||||
remoteHost: remoteHost,
|
||||
|
|
@ -38,132 +42,113 @@ func NewLabelManager(
|
|||
serviceName: serviceName,
|
||||
entrypoint: entrypoint,
|
||||
certResolver: certResolver,
|
||||
labels: make(map[string]bool),
|
||||
authLabels: make(map[string]bool),
|
||||
configDir: configDir,
|
||||
configs: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Verify we can reach the Swarm manager and the service exists.
|
||||
cmd := fmt.Sprintf("docker service inspect --format '{{.Spec.Name}}' %s", serviceName)
|
||||
// Ensure the config directory exists.
|
||||
cmd := fmt.Sprintf("mkdir -p %s", configDir)
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
log.Printf("WARN: could not verify service %s (may not exist yet): %v", serviceName, err)
|
||||
} else {
|
||||
log.Printf("Verified Swarm service: %s", serviceName)
|
||||
log.Printf("WARN: could not ensure config dir %s: %v", configDir, err)
|
||||
}
|
||||
|
||||
log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s)",
|
||||
remoteHost, serviceName, entrypoint, certResolver)
|
||||
log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s, dir=%s)",
|
||||
remoteHost, serviceName, entrypoint, certResolver, configDir)
|
||||
|
||||
return lm, nil
|
||||
}
|
||||
|
||||
// Add adds Traefik routing labels to the Swarm service for a tunnel.
|
||||
// If authUser and authPass are non-empty, a basicauth middleware is also added.
|
||||
// Add writes a Traefik dynamic config YAML to the remote host for a tunnel.
|
||||
// If authUser and authPass are non-empty, a basicauth middleware is included.
|
||||
func (lm *LabelManager) Add(tunKey, domain string, port int, authUser, authPass string) error {
|
||||
lm.mu.Lock()
|
||||
defer lm.mu.Unlock()
|
||||
|
||||
routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
|
||||
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
|
||||
middlewareName := fmt.Sprintf("tunnel-%s-auth", tunKey)
|
||||
svcName := fmt.Sprintf("tunnel-%s-service", tunKey)
|
||||
|
||||
// Build the label-add flags for docker service update.
|
||||
labelArgs := []string{
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
|
||||
fmt.Sprintf("Host(`%s`)", domain)),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
|
||||
lm.entrypoint),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
|
||||
"true"),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
|
||||
lm.certResolver),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.service", routerName),
|
||||
serviceName),
|
||||
labelFlag(fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
|
||||
fmt.Sprintf("%d", port)),
|
||||
}
|
||||
// Build optional middleware section.
|
||||
var middlewareYAML string
|
||||
var routerMiddleware string
|
||||
|
||||
// If auth credentials are provided, add basicauth middleware labels.
|
||||
if authUser != "" && authPass != "" {
|
||||
middlewareName := fmt.Sprintf("tunnel-%s-auth", tunKey)
|
||||
htpasswd, err := generateHTPasswd(authUser, authPass)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generate htpasswd for %s: %w", domain, err)
|
||||
}
|
||||
labelArgs = append(labelArgs,
|
||||
labelFlag(
|
||||
fmt.Sprintf("traefik.http.middlewares.%s.basicauth.users", middlewareName),
|
||||
htpasswd,
|
||||
),
|
||||
labelFlag(
|
||||
fmt.Sprintf("traefik.http.routers.%s.middlewares", routerName),
|
||||
middlewareName,
|
||||
),
|
||||
)
|
||||
lm.authLabels[tunKey] = true
|
||||
log.Printf("BasicAuth middleware %s added for %s", middlewareName, domain)
|
||||
middlewareYAML = fmt.Sprintf(
|
||||
" middlewares:\n %s:\n basicAuth:\n users:\n - %q\n",
|
||||
middlewareName, htpasswd)
|
||||
routerMiddleware = fmt.Sprintf(
|
||||
"\n middlewares:\n - %s", middlewareName)
|
||||
log.Printf("BasicAuth middleware %s configured for %s", middlewareName, domain)
|
||||
}
|
||||
|
||||
cmd := fmt.Sprintf("docker service update --label-add %s %s",
|
||||
strings.Join(labelArgs, " --label-add "), lm.serviceName)
|
||||
yaml := fmt.Sprintf(
|
||||
"http:\n"+
|
||||
" routers:\n"+
|
||||
" %s:\n"+
|
||||
" rule: \"Host(`%s`)\"\n"+
|
||||
" entryPoints:\n"+
|
||||
" - %s\n"+
|
||||
" tls:\n"+
|
||||
" certResolver: %s\n"+
|
||||
" service: %s%s\n"+
|
||||
" services:\n"+
|
||||
" %s:\n"+
|
||||
" loadBalancer:\n"+
|
||||
" servers:\n"+
|
||||
" - url: \"http://%s:%d\"\n"+
|
||||
"%s",
|
||||
routerName,
|
||||
domain,
|
||||
lm.entrypoint,
|
||||
lm.certResolver,
|
||||
svcName,
|
||||
routerMiddleware,
|
||||
svcName,
|
||||
lm.serviceName,
|
||||
port,
|
||||
middlewareYAML,
|
||||
)
|
||||
|
||||
filePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
|
||||
|
||||
// Write via heredoc so we don't need to escape anything.
|
||||
cmd := fmt.Sprintf("cat > '%s' << 'TRAEFIKEOF'\n%sTRAEFIKEOF", filePath, yaml)
|
||||
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
return fmt.Errorf("add labels for %s: %w", domain, err)
|
||||
return fmt.Errorf("write config for %s: %w", domain, err)
|
||||
}
|
||||
|
||||
lm.labels[tunKey] = true
|
||||
log.Printf("Added Swarm labels: %s -> %s:%d", domain, lm.serviceName, port)
|
||||
lm.configs[tunKey] = true
|
||||
log.Printf("Wrote Traefik config: %s -> %s:%d (%s)", domain, lm.serviceName, port, filePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove removes Traefik routing labels from the Swarm service for a tunnel.
|
||||
// Remove deletes the Traefik dynamic config file for a tunnel.
|
||||
func (lm *LabelManager) Remove(tunKey string) error {
|
||||
lm.mu.Lock()
|
||||
defer lm.mu.Unlock()
|
||||
|
||||
if !lm.labels[tunKey] {
|
||||
return nil // nothing to remove
|
||||
if !lm.configs[tunKey] {
|
||||
return nil
|
||||
}
|
||||
|
||||
routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
|
||||
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
|
||||
|
||||
middlewareName := fmt.Sprintf("tunnel-%s-auth", tunKey)
|
||||
|
||||
// Build the label-rm flags.
|
||||
rmLabels := []string{
|
||||
fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.service", routerName),
|
||||
fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
|
||||
}
|
||||
|
||||
// Remove auth middleware labels if they were added.
|
||||
if lm.authLabels[tunKey] {
|
||||
rmLabels = append(rmLabels,
|
||||
fmt.Sprintf("traefik.http.middlewares.%s.basicauth.users", middlewareName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.middlewares", routerName),
|
||||
)
|
||||
delete(lm.authLabels, tunKey)
|
||||
log.Printf("Removing BasicAuth middleware %s", middlewareName)
|
||||
}
|
||||
|
||||
cmd := fmt.Sprintf("docker service update --label-rm %s %s",
|
||||
strings.Join(rmLabels, " --label-rm "), lm.serviceName)
|
||||
filePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
|
||||
cmd := fmt.Sprintf("rm -f '%s'", filePath)
|
||||
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
return fmt.Errorf("remove labels for %s: %w", tunKey, err)
|
||||
return fmt.Errorf("remove config for %s: %w", tunKey, err)
|
||||
}
|
||||
|
||||
delete(lm.labels, tunKey)
|
||||
log.Printf("Removed Swarm labels for tunnel: %s", tunKey)
|
||||
delete(lm.configs, tunKey)
|
||||
log.Printf("Removed Traefik config: %s", filePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateHTPasswd creates a bcrypt-hashed htpasswd entry for Traefik basicauth.
|
||||
// The output format is user:$hash. Dollar signs are NOT doubled here because
|
||||
// we pass labels via docker service update with single-quoted values, which
|
||||
// preserves them literally. Doubling is only needed in compose files.
|
||||
func generateHTPasswd(user, pass string) (string, error) {
|
||||
hash, err := bcrypt.GenerateFromPassword([]byte(pass), bcrypt.DefaultCost)
|
||||
if err != nil {
|
||||
|
|
@ -172,12 +157,7 @@ func generateHTPasswd(user, pass string) (string, error) {
|
|||
return fmt.Sprintf("%s:%s", user, string(hash)), nil
|
||||
}
|
||||
|
||||
// labelFlag formats a --label-add value, quoting properly for shell.
|
||||
func labelFlag(key, value string) string {
|
||||
return fmt.Sprintf("'%s=%s'", key, value)
|
||||
}
|
||||
|
||||
// runRemote executes a command on the Swarm manager via SSH.
|
||||
// runRemote executes a command on the Traefik host via SSH.
|
||||
func (lm *LabelManager) runRemote(cmd string) error {
|
||||
addr := lm.remoteHost
|
||||
if !strings.Contains(addr, ":") {
|
||||
|
|
|
|||
Loading…
Reference in New Issue