Rework for Swarm deploy on ingress.nixc.us

- labels.go: use `docker service update --label-add/rm` via SSH to
  dynamically manage Traefik labels on the Swarm service itself,
  matching how traefik-http discovers routes from Docker swarm labels
- stack.production.yml: constrain to ingress.nixc.us, host-mode port
  2222, base traefik.enable labels, SWARM_SERVICE_NAME env
- cmd/server/main.go: SWARM_SERVICE_NAME replaces TRAEFIK_CONFIG_DIR
- .woodpecker.yml: hardcode stack name better-argo-tunnels, update
  smoke test env vars

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Leopere 2026-02-08 18:24:13 -05:00
parent ccead8733a
commit b2820ed47f
Signed by: colin
SSH Key Fingerprint: SHA256:nRPCQTeMFLdGytxRQmPVK9VXY3/ePKQ5lGRyJhT5DY8
4 changed files with 99 additions and 84 deletions

View File

@ -49,7 +49,6 @@ steps:
- echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us
- apk add --no-cache git || true
- export GIT_COMMIT=$${CI_COMMIT_SHA}
- export GIT_COMMIT_DATE=$(git log -1 --format=%ci HEAD 2>/dev/null || echo "unknown")
- echo "Building GIT_COMMIT=$GIT_COMMIT"
# Build server image
- docker build --target server -t git.nixc.us/colin/better-argo-tunnels:production .
@ -79,7 +78,7 @@ steps:
- echo "$${REGISTRY_PASSWORD}" | docker login git.nixc.us -u "$${REGISTRY_USER}" --password-stdin
- docker pull git.nixc.us/colin/better-argo-tunnels:production
- docker rm -f tunnel-smoke || true
# Smoke: just verify the binary runs and prints startup log
# Smoke: verify the binary runs and prints startup log
- mkdir -p /tmp/smoke-keys
- ssh-keygen -t ed25519 -f /tmp/smoke-keys/host_key -N "" -q
- ssh-keygen -t ed25519 -f /tmp/smoke-keys/client_key -N "" -q
@ -91,7 +90,7 @@ steps:
-e AUTHORIZED_KEYS=/keys/authorized_keys \
-e TRAEFIK_SSH_HOST=127.0.0.1 \
-e TRAEFIK_SSH_KEY=/keys/host_key \
-e TRAEFIK_CONFIG_DIR=/tmp/dynamic \
-e SWARM_SERVICE_NAME=smoke-test \
-v /tmp/smoke-keys:/keys:ro \
git.nixc.us/colin/better-argo-tunnels:production
- sleep 3
@ -103,7 +102,7 @@ steps:
branch: main
event: [push, cron]
# Deploy to Swarm
# Deploy to Swarm on ingress.nixc.us
deploy-production:
name: deploy-production
image: woodpeckerci/plugin-docker-buildx
@ -129,7 +128,7 @@ steps:
- echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us
# Remove old stack
- echo "Removing old stack..."
- docker stack rm $${CI_REPO_NAME} || true
- docker stack rm better-argo-tunnels || true
- sleep 10
# Refresh secrets
- |
@ -148,7 +147,7 @@ steps:
docker secret ls | grep tunnel_
# Deploy stack
- echo "Deploying stack..."
- docker stack deploy --with-registry-auth -c ./stack.production.yml $${CI_REPO_NAME}
- docker stack deploy --with-registry-auth -c ./stack.production.yml better-argo-tunnels
when:
branch: main
event: [push, cron]

View File

@ -49,29 +49,29 @@ func main() {
portStart := envInt("PORT_RANGE_START", 10000)
portEnd := envInt("PORT_RANGE_END", 10100)
// Remote Traefik host config (SSH into the ingress host to manage routes).
// Swarm manager SSH config (for updating service labels).
traefikHost := envRequired("TRAEFIK_SSH_HOST")
traefikUser := envOr("TRAEFIK_SSH_USER", "root")
traefikKey := envRequired("TRAEFIK_SSH_KEY")
traefikConfigDir := envOr("TRAEFIK_CONFIG_DIR", "/root/traefik/dynamic")
serviceName := envOr("SWARM_SERVICE_NAME", "better-argo-tunnels_tunnel-server")
entrypoint := envOr("TRAEFIK_ENTRYPOINT", "websecure")
certResolver := envOr("TRAEFIK_CERT_RESOLVER", "letsencryptresolver")
// Load the SSH key for connecting to the Traefik host.
// Load the SSH key for connecting to the Swarm manager.
traefikSigner, err := server.LoadSigner(traefikKey)
if err != nil {
log.Fatalf("Failed to load Traefik SSH key: %v", err)
}
log.Printf("Loaded Traefik host SSH key")
log.Printf("Loaded Swarm manager SSH key")
// Initialize port pool.
pool := server.NewPortPool(portStart, portEnd)
log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1)
// Initialize Traefik label manager (remote SSH).
// Initialize label manager (Swarm service update via SSH).
labels, err := server.NewLabelManager(
traefikHost, traefikUser, traefikSigner,
traefikConfigDir, entrypoint, certResolver,
serviceName, entrypoint, certResolver,
)
if err != nil {
log.Fatalf("Failed to init label manager: %v", err)

View File

@ -9,85 +9,126 @@ import (
"golang.org/x/crypto/ssh"
)
// LabelManager manages Traefik dynamic config on a remote host via SSH.
// It SSHs into the Traefik host and writes per-tunnel YAML config files
// into the Traefik file provider directory.
// LabelManager manages Traefik routing labels on its own Swarm service
// by SSHing into the Swarm manager and running docker service update.
type LabelManager struct {
mu sync.Mutex
remoteHost string // e.g. "ingress.nixc.us" or "ingress.nixc.us:22"
remoteUser string // SSH user on the Traefik host
remoteHost string // Swarm manager, e.g. "ingress.nixc.us"
remoteUser string // SSH user
signer ssh.Signer
configDir string // remote path where Traefik watches for file provider
serviceName string // Swarm service name, e.g. "better-argo-tunnels_tunnel-server"
entrypoint string // e.g. "websecure"
certResolver string // e.g. "letsencryptresolver"
labels map[string]bool // track which tunnel keys we've added
}
// NewLabelManager creates a label manager that writes Traefik config via SSH.
// NewLabelManager creates a label manager that updates Swarm service labels via SSH.
func NewLabelManager(
remoteHost, remoteUser string,
signer ssh.Signer,
configDir, entrypoint, certResolver string,
serviceName, entrypoint, certResolver string,
) (*LabelManager, error) {
lm := &LabelManager{
remoteHost: remoteHost,
remoteUser: remoteUser,
signer: signer,
configDir: configDir,
serviceName: serviceName,
entrypoint: entrypoint,
certResolver: certResolver,
labels: make(map[string]bool),
}
// Ensure the remote config directory exists.
if err := lm.runRemote(fmt.Sprintf("mkdir -p %s", configDir)); err != nil {
return nil, fmt.Errorf("ensure remote config dir: %w", err)
// Verify we can reach the Swarm manager and the service exists.
cmd := fmt.Sprintf("docker service inspect --format '{{.Spec.Name}}' %s", serviceName)
if err := lm.runRemote(cmd); err != nil {
log.Printf("WARN: could not verify service %s (may not exist yet): %v", serviceName, err)
} else {
log.Printf("Verified Swarm service: %s", serviceName)
}
log.Printf("Label manager ready (host=%s, dir=%s, ep=%s, resolver=%s)",
remoteHost, configDir, entrypoint, certResolver)
log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s)",
remoteHost, serviceName, entrypoint, certResolver)
return lm, nil
}
// Add writes a Traefik dynamic config file on the remote host for a tunnel.
// Add adds Traefik routing labels to the Swarm service for a tunnel.
func (lm *LabelManager) Add(tunKey, domain string, port int) error {
lm.mu.Lock()
defer lm.mu.Unlock()
routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
cfg := buildRouteConfig(routerName, serviceName, domain, port, lm.entrypoint, lm.certResolver)
remotePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
// Write the config file via SSH using cat heredoc.
cmd := fmt.Sprintf("cat > %s << 'TRAEFIKEOF'\n%sTRAEFIKEOF", remotePath, cfg)
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("write remote config %s: %w", remotePath, err)
// Build the label-add flags for docker service update.
labelArgs := []string{
labelFlag(fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
fmt.Sprintf("Host(`%s`)", domain)),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
lm.entrypoint),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
"true"),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
lm.certResolver),
labelFlag(fmt.Sprintf("traefik.http.routers.%s.service", routerName),
serviceName),
labelFlag(fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
fmt.Sprintf("%d", port)),
}
log.Printf("Wrote remote Traefik config: %s (domain=%s port=%d)", remotePath, domain, port)
cmd := fmt.Sprintf("docker service update --label-add %s %s",
strings.Join(labelArgs, " --label-add "), lm.serviceName)
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("add labels for %s: %w", domain, err)
}
lm.labels[tunKey] = true
log.Printf("Added Swarm labels: %s -> %s:%d", domain, lm.serviceName, port)
return nil
}
// Remove deletes the Traefik dynamic config file on the remote host.
// Remove removes Traefik routing labels from the Swarm service for a tunnel.
func (lm *LabelManager) Remove(tunKey string) error {
lm.mu.Lock()
defer lm.mu.Unlock()
remotePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
cmd := fmt.Sprintf("rm -f %s", remotePath)
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("remove remote config %s: %w", remotePath, err)
if !lm.labels[tunKey] {
return nil // nothing to remove
}
log.Printf("Removed remote Traefik config: %s", remotePath)
routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
// Build the label-rm flags.
rmLabels := []string{
fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
fmt.Sprintf("traefik.http.routers.%s.service", routerName),
fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
}
cmd := fmt.Sprintf("docker service update --label-rm %s %s",
strings.Join(rmLabels, " --label-rm "), lm.serviceName)
if err := lm.runRemote(cmd); err != nil {
return fmt.Errorf("remove labels for %s: %w", tunKey, err)
}
delete(lm.labels, tunKey)
log.Printf("Removed Swarm labels for tunnel: %s", tunKey)
return nil
}
// runRemote executes a command on the remote Traefik host via SSH.
// labelFlag formats a --label-add value, quoting properly for shell.
func labelFlag(key, value string) string {
return fmt.Sprintf("'%s=%s'", key, value)
}
// runRemote executes a command on the Swarm manager via SSH.
func (lm *LabelManager) runRemote(cmd string) error {
addr := lm.remoteHost
if !strings.Contains(addr, ":") {
@ -122,39 +163,6 @@ func (lm *LabelManager) runRemote(cmd string) error {
return nil
}
// buildRouteConfig generates Traefik dynamic config YAML for one tunnel.
func buildRouteConfig(
routerName, serviceName, domain string,
port int,
entrypoint, certResolver string,
) string {
var b strings.Builder
b.WriteString("# Auto-generated by tunnel-server. Do not edit.\n")
b.WriteString("http:\n")
// Router
b.WriteString(" routers:\n")
b.WriteString(fmt.Sprintf(" %s:\n", routerName))
b.WriteString(fmt.Sprintf(" rule: \"Host(`%s`)\"\n", domain))
b.WriteString(" entryPoints:\n")
b.WriteString(fmt.Sprintf(" - %s\n", entrypoint))
b.WriteString(" tls:\n")
b.WriteString(fmt.Sprintf(" certResolver: %s\n", certResolver))
b.WriteString(fmt.Sprintf(" service: %s\n", serviceName))
// Service — points to the tunnel-server's allocated port.
// The tunnel-server container is on the same network as Traefik,
// so Traefik can reach it by container name or IP.
b.WriteString(" services:\n")
b.WriteString(fmt.Sprintf(" %s:\n", serviceName))
b.WriteString(" loadBalancer:\n")
b.WriteString(" servers:\n")
b.WriteString(fmt.Sprintf(" - url: \"http://tunnel-server:%d\"\n", port))
return b.String()
}
// Close is a no-op — SSH connections are opened/closed per operation.
func (lm *LabelManager) Close() error {
return nil

View File

@ -34,7 +34,7 @@ services:
TRAEFIK_SSH_HOST: "ingress.nixc.us"
TRAEFIK_SSH_USER: "root"
TRAEFIK_SSH_KEY: "/run/secrets/traefik_deploy_key"
TRAEFIK_CONFIG_DIR: "/root/traefik/dynamic"
SWARM_SERVICE_NAME: "better-argo-tunnels_tunnel-server"
TRAEFIK_ENTRYPOINT: "websecure"
TRAEFIK_CERT_RESOLVER: "letsencryptresolver"
HOSTNAME: "{{.Node.Hostname}}"
@ -43,16 +43,24 @@ services:
TASK_ID: "{{.Task.ID}}"
ENVIRONMENT: "production"
ports:
- "2222:2222"
- "10000-10100:10000-10100"
- target: 2222
published: 2222
protocol: tcp
mode: host
deploy:
replicas: 1
placement:
constraints:
- node.hostname == macmini1
- node.hostname == ingress.nixc.us
labels:
traefik.enable: "true"
traefik.tcp.routers.tunnel-ssh-router.rule: "HostSNI(`*`)"
traefik.tcp.routers.tunnel-ssh-router.entrypoints: "ssh"
traefik.tcp.services.tunnel-ssh-service.loadbalancer.server.port: "2222"
traefik.docker.network: "traefik"
# Dynamic tunnel labels are added at runtime via docker service update.
# The base labels below just enable Traefik discovery.
update_config:
order: stop-first
failure_action: rollback
delay: 0s
parallelism: 1
restart_policy:
condition: on-failure