Rework for Swarm deploy on ingress.nixc.us

- labels.go: use `docker service update --label-add/rm` via SSH to
  dynamically manage Traefik labels on the Swarm service itself,
  matching how traefik-http discovers routes from Docker swarm labels
- stack.production.yml: constrain to ingress.nixc.us, host-mode port
  2222, base traefik.enable labels, SWARM_SERVICE_NAME env
- cmd/server/main.go: SWARM_SERVICE_NAME replaces TRAEFIK_CONFIG_DIR
- .woodpecker.yml: hardcode stack name better-argo-tunnels, update
  smoke test env vars

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Leopere 2026-02-08 18:24:13 -05:00
parent ccead8733a
commit b2820ed47f
Signed by: colin
SSH Key Fingerprint: SHA256:nRPCQTeMFLdGytxRQmPVK9VXY3/ePKQ5lGRyJhT5DY8
4 changed files with 99 additions and 84 deletions

View File

@ -49,7 +49,6 @@ steps:
- echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us - echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us
- apk add --no-cache git || true - apk add --no-cache git || true
- export GIT_COMMIT=$${CI_COMMIT_SHA} - export GIT_COMMIT=$${CI_COMMIT_SHA}
- export GIT_COMMIT_DATE=$(git log -1 --format=%ci HEAD 2>/dev/null || echo "unknown")
- echo "Building GIT_COMMIT=$GIT_COMMIT" - echo "Building GIT_COMMIT=$GIT_COMMIT"
# Build server image # Build server image
- docker build --target server -t git.nixc.us/colin/better-argo-tunnels:production . - docker build --target server -t git.nixc.us/colin/better-argo-tunnels:production .
@ -79,7 +78,7 @@ steps:
- echo "$${REGISTRY_PASSWORD}" | docker login git.nixc.us -u "$${REGISTRY_USER}" --password-stdin - echo "$${REGISTRY_PASSWORD}" | docker login git.nixc.us -u "$${REGISTRY_USER}" --password-stdin
- docker pull git.nixc.us/colin/better-argo-tunnels:production - docker pull git.nixc.us/colin/better-argo-tunnels:production
- docker rm -f tunnel-smoke || true - docker rm -f tunnel-smoke || true
# Smoke: just verify the binary runs and prints startup log # Smoke: verify the binary runs and prints startup log
- mkdir -p /tmp/smoke-keys - mkdir -p /tmp/smoke-keys
- ssh-keygen -t ed25519 -f /tmp/smoke-keys/host_key -N "" -q - ssh-keygen -t ed25519 -f /tmp/smoke-keys/host_key -N "" -q
- ssh-keygen -t ed25519 -f /tmp/smoke-keys/client_key -N "" -q - ssh-keygen -t ed25519 -f /tmp/smoke-keys/client_key -N "" -q
@ -91,7 +90,7 @@ steps:
-e AUTHORIZED_KEYS=/keys/authorized_keys \ -e AUTHORIZED_KEYS=/keys/authorized_keys \
-e TRAEFIK_SSH_HOST=127.0.0.1 \ -e TRAEFIK_SSH_HOST=127.0.0.1 \
-e TRAEFIK_SSH_KEY=/keys/host_key \ -e TRAEFIK_SSH_KEY=/keys/host_key \
-e TRAEFIK_CONFIG_DIR=/tmp/dynamic \ -e SWARM_SERVICE_NAME=smoke-test \
-v /tmp/smoke-keys:/keys:ro \ -v /tmp/smoke-keys:/keys:ro \
git.nixc.us/colin/better-argo-tunnels:production git.nixc.us/colin/better-argo-tunnels:production
- sleep 3 - sleep 3
@ -103,7 +102,7 @@ steps:
branch: main branch: main
event: [push, cron] event: [push, cron]
# Deploy to Swarm # Deploy to Swarm on ingress.nixc.us
deploy-production: deploy-production:
name: deploy-production name: deploy-production
image: woodpeckerci/plugin-docker-buildx image: woodpeckerci/plugin-docker-buildx
@ -129,7 +128,7 @@ steps:
- echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us - echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us
# Remove old stack # Remove old stack
- echo "Removing old stack..." - echo "Removing old stack..."
- docker stack rm $${CI_REPO_NAME} || true - docker stack rm better-argo-tunnels || true
- sleep 10 - sleep 10
# Refresh secrets # Refresh secrets
- | - |
@ -148,7 +147,7 @@ steps:
docker secret ls | grep tunnel_ docker secret ls | grep tunnel_
# Deploy stack # Deploy stack
- echo "Deploying stack..." - echo "Deploying stack..."
- docker stack deploy --with-registry-auth -c ./stack.production.yml $${CI_REPO_NAME} - docker stack deploy --with-registry-auth -c ./stack.production.yml better-argo-tunnels
when: when:
branch: main branch: main
event: [push, cron] event: [push, cron]

View File

@ -49,29 +49,29 @@ func main() {
portStart := envInt("PORT_RANGE_START", 10000) portStart := envInt("PORT_RANGE_START", 10000)
portEnd := envInt("PORT_RANGE_END", 10100) portEnd := envInt("PORT_RANGE_END", 10100)
// Remote Traefik host config (SSH into the ingress host to manage routes). // Swarm manager SSH config (for updating service labels).
traefikHost := envRequired("TRAEFIK_SSH_HOST") traefikHost := envRequired("TRAEFIK_SSH_HOST")
traefikUser := envOr("TRAEFIK_SSH_USER", "root") traefikUser := envOr("TRAEFIK_SSH_USER", "root")
traefikKey := envRequired("TRAEFIK_SSH_KEY") traefikKey := envRequired("TRAEFIK_SSH_KEY")
traefikConfigDir := envOr("TRAEFIK_CONFIG_DIR", "/root/traefik/dynamic") serviceName := envOr("SWARM_SERVICE_NAME", "better-argo-tunnels_tunnel-server")
entrypoint := envOr("TRAEFIK_ENTRYPOINT", "websecure") entrypoint := envOr("TRAEFIK_ENTRYPOINT", "websecure")
certResolver := envOr("TRAEFIK_CERT_RESOLVER", "letsencryptresolver") certResolver := envOr("TRAEFIK_CERT_RESOLVER", "letsencryptresolver")
// Load the SSH key for connecting to the Traefik host. // Load the SSH key for connecting to the Swarm manager.
traefikSigner, err := server.LoadSigner(traefikKey) traefikSigner, err := server.LoadSigner(traefikKey)
if err != nil { if err != nil {
log.Fatalf("Failed to load Traefik SSH key: %v", err) log.Fatalf("Failed to load Traefik SSH key: %v", err)
} }
log.Printf("Loaded Traefik host SSH key") log.Printf("Loaded Swarm manager SSH key")
// Initialize port pool. // Initialize port pool.
pool := server.NewPortPool(portStart, portEnd) pool := server.NewPortPool(portStart, portEnd)
log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1) log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1)
// Initialize Traefik label manager (remote SSH). // Initialize label manager (Swarm service update via SSH).
labels, err := server.NewLabelManager( labels, err := server.NewLabelManager(
traefikHost, traefikUser, traefikSigner, traefikHost, traefikUser, traefikSigner,
traefikConfigDir, entrypoint, certResolver, serviceName, entrypoint, certResolver,
) )
if err != nil { if err != nil {
log.Fatalf("Failed to init label manager: %v", err) log.Fatalf("Failed to init label manager: %v", err)

View File

@ -9,85 +9,126 @@ import (
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
) )
// LabelManager manages Traefik dynamic config on a remote host via SSH. // LabelManager manages Traefik routing labels on its own Swarm service
// It SSHs into the Traefik host and writes per-tunnel YAML config files // by SSHing into the Swarm manager and running docker service update.
// into the Traefik file provider directory.
type LabelManager struct { type LabelManager struct {
mu sync.Mutex mu sync.Mutex
remoteHost string // e.g. "ingress.nixc.us" or "ingress.nixc.us:22" remoteHost string // Swarm manager, e.g. "ingress.nixc.us"
remoteUser string // SSH user on the Traefik host remoteUser string // SSH user
signer ssh.Signer signer ssh.Signer
configDir string // remote path where Traefik watches for file provider serviceName string // Swarm service name, e.g. "better-argo-tunnels_tunnel-server"
entrypoint string // e.g. "websecure" entrypoint string // e.g. "websecure"
certResolver string // e.g. "letsencryptresolver" certResolver string // e.g. "letsencryptresolver"
labels map[string]bool // track which tunnel keys we've added
} }
// NewLabelManager creates a label manager that writes Traefik config via SSH. // NewLabelManager creates a label manager that updates Swarm service labels via SSH.
func NewLabelManager( func NewLabelManager(
remoteHost, remoteUser string, remoteHost, remoteUser string,
signer ssh.Signer, signer ssh.Signer,
configDir, entrypoint, certResolver string, serviceName, entrypoint, certResolver string,
) (*LabelManager, error) { ) (*LabelManager, error) {
lm := &LabelManager{ lm := &LabelManager{
remoteHost: remoteHost, remoteHost: remoteHost,
remoteUser: remoteUser, remoteUser: remoteUser,
signer: signer, signer: signer,
configDir: configDir, serviceName: serviceName,
entrypoint: entrypoint, entrypoint: entrypoint,
certResolver: certResolver, certResolver: certResolver,
labels: make(map[string]bool),
} }
// Ensure the remote config directory exists. // Verify we can reach the Swarm manager and the service exists.
if err := lm.runRemote(fmt.Sprintf("mkdir -p %s", configDir)); err != nil { cmd := fmt.Sprintf("docker service inspect --format '{{.Spec.Name}}' %s", serviceName)
return nil, fmt.Errorf("ensure remote config dir: %w", err) if err := lm.runRemote(cmd); err != nil {
log.Printf("WARN: could not verify service %s (may not exist yet): %v", serviceName, err)
} else {
log.Printf("Verified Swarm service: %s", serviceName)
} }
log.Printf("Label manager ready (host=%s, dir=%s, ep=%s, resolver=%s)", log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s)",
remoteHost, configDir, entrypoint, certResolver) remoteHost, serviceName, entrypoint, certResolver)
return lm, nil return lm, nil
} }
// Add writes a Traefik dynamic config file on the remote host for a tunnel. // Add adds Traefik routing labels to the Swarm service for a tunnel.
func (lm *LabelManager) Add(tunKey, domain string, port int) error { func (lm *LabelManager) Add(tunKey, domain string, port int) error {
lm.mu.Lock() lm.mu.Lock()
defer lm.mu.Unlock() defer lm.mu.Unlock()
routerName := fmt.Sprintf("tunnel-%s-router", tunKey) routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey) serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
cfg := buildRouteConfig(routerName, serviceName, domain, port, lm.entrypoint, lm.certResolver)
remotePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey) // Build the label-add flags for docker service update.
labelArgs := []string{
// Write the config file via SSH using cat heredoc. labelFlag(fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
cmd := fmt.Sprintf("cat > %s << 'TRAEFIKEOF'\n%sTRAEFIKEOF", remotePath, cfg) fmt.Sprintf("Host(`%s`)", domain)),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
if err := lm.runRemote(cmd); err != nil { lm.entrypoint),
return fmt.Errorf("write remote config %s: %w", remotePath, err) labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
"true"),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
lm.certResolver),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.service", routerName),
serviceName),
labelFlag(fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
fmt.Sprintf("%d", port)),
} }
log.Printf("Wrote remote Traefik config: %s (domain=%s port=%d)", remotePath, domain, port) cmd := fmt.Sprintf("docker service update --label-add %s %s",
strings.Join(labelArgs, " --label-add "), lm.serviceName)
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("add labels for %s: %w", domain, err)
}
lm.labels[tunKey] = true
log.Printf("Added Swarm labels: %s -> %s:%d", domain, lm.serviceName, port)
return nil return nil
} }
// Remove deletes the Traefik dynamic config file on the remote host. // Remove removes Traefik routing labels from the Swarm service for a tunnel.
func (lm *LabelManager) Remove(tunKey string) error { func (lm *LabelManager) Remove(tunKey string) error {
lm.mu.Lock() lm.mu.Lock()
defer lm.mu.Unlock() defer lm.mu.Unlock()
remotePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey) if !lm.labels[tunKey] {
cmd := fmt.Sprintf("rm -f %s", remotePath) return nil // nothing to remove
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("remove remote config %s: %w", remotePath, err)
} }
log.Printf("Removed remote Traefik config: %s", remotePath) routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
// Build the label-rm flags.
rmLabels := []string{
fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
fmt.Sprintf("traefik.http.routers.%s.service", routerName),
fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
}
cmd := fmt.Sprintf("docker service update --label-rm %s %s",
strings.Join(rmLabels, " --label-rm "), lm.serviceName)
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("remove labels for %s: %w", tunKey, err)
}
delete(lm.labels, tunKey)
log.Printf("Removed Swarm labels for tunnel: %s", tunKey)
return nil return nil
} }
// runRemote executes a command on the remote Traefik host via SSH. // labelFlag formats a --label-add value, quoting properly for shell.
func labelFlag(key, value string) string {
return fmt.Sprintf("'%s=%s'", key, value)
}
// runRemote executes a command on the Swarm manager via SSH.
func (lm *LabelManager) runRemote(cmd string) error { func (lm *LabelManager) runRemote(cmd string) error {
addr := lm.remoteHost addr := lm.remoteHost
if !strings.Contains(addr, ":") { if !strings.Contains(addr, ":") {
@ -122,39 +163,6 @@ func (lm *LabelManager) runRemote(cmd string) error {
return nil return nil
} }
// buildRouteConfig generates Traefik dynamic config YAML for one tunnel.
func buildRouteConfig(
routerName, serviceName, domain string,
port int,
entrypoint, certResolver string,
) string {
var b strings.Builder
b.WriteString("# Auto-generated by tunnel-server. Do not edit.\n")
b.WriteString("http:\n")
// Router
b.WriteString(" routers:\n")
b.WriteString(fmt.Sprintf(" %s:\n", routerName))
b.WriteString(fmt.Sprintf(" rule: \"Host(`%s`)\"\n", domain))
b.WriteString(" entryPoints:\n")
b.WriteString(fmt.Sprintf(" - %s\n", entrypoint))
b.WriteString(" tls:\n")
b.WriteString(fmt.Sprintf(" certResolver: %s\n", certResolver))
b.WriteString(fmt.Sprintf(" service: %s\n", serviceName))
// Service — points to the tunnel-server's allocated port.
// The tunnel-server container is on the same network as Traefik,
// so Traefik can reach it by container name or IP.
b.WriteString(" services:\n")
b.WriteString(fmt.Sprintf(" %s:\n", serviceName))
b.WriteString(" loadBalancer:\n")
b.WriteString(" servers:\n")
b.WriteString(fmt.Sprintf(" - url: \"http://tunnel-server:%d\"\n", port))
return b.String()
}
// Close is a no-op — SSH connections are opened/closed per operation. // Close is a no-op — SSH connections are opened/closed per operation.
func (lm *LabelManager) Close() error { func (lm *LabelManager) Close() error {
return nil return nil

View File

@ -34,7 +34,7 @@ services:
TRAEFIK_SSH_HOST: "ingress.nixc.us" TRAEFIK_SSH_HOST: "ingress.nixc.us"
TRAEFIK_SSH_USER: "root" TRAEFIK_SSH_USER: "root"
TRAEFIK_SSH_KEY: "/run/secrets/traefik_deploy_key" TRAEFIK_SSH_KEY: "/run/secrets/traefik_deploy_key"
TRAEFIK_CONFIG_DIR: "/root/traefik/dynamic" SWARM_SERVICE_NAME: "better-argo-tunnels_tunnel-server"
TRAEFIK_ENTRYPOINT: "websecure" TRAEFIK_ENTRYPOINT: "websecure"
TRAEFIK_CERT_RESOLVER: "letsencryptresolver" TRAEFIK_CERT_RESOLVER: "letsencryptresolver"
HOSTNAME: "{{.Node.Hostname}}" HOSTNAME: "{{.Node.Hostname}}"
@ -43,16 +43,24 @@ services:
TASK_ID: "{{.Task.ID}}" TASK_ID: "{{.Task.ID}}"
ENVIRONMENT: "production" ENVIRONMENT: "production"
ports: ports:
- "2222:2222" - target: 2222
- "10000-10100:10000-10100" published: 2222
protocol: tcp
mode: host
deploy: deploy:
replicas: 1 replicas: 1
placement: placement:
constraints: constraints:
- node.hostname == macmini1 - node.hostname == ingress.nixc.us
labels: labels:
traefik.enable: "true" traefik.enable: "true"
traefik.tcp.routers.tunnel-ssh-router.rule: "HostSNI(`*`)"
traefik.tcp.routers.tunnel-ssh-router.entrypoints: "ssh"
traefik.tcp.services.tunnel-ssh-service.loadbalancer.server.port: "2222"
traefik.docker.network: "traefik" traefik.docker.network: "traefik"
# Dynamic tunnel labels are added at runtime via docker service update.
# The base labels below just enable Traefik discovery.
update_config:
order: stop-first
failure_action: rollback
delay: 0s
parallelism: 1
restart_policy:
condition: on-failure