Rework for Swarm deploy on ingress.nixc.us
- labels.go: use `docker service update --label-add/rm` via SSH to dynamically manage Traefik labels on the Swarm service itself, matching how traefik-http discovers routes from Docker swarm labels - stack.production.yml: constrain to ingress.nixc.us, host-mode port 2222, base traefik.enable labels, SWARM_SERVICE_NAME env - cmd/server/main.go: SWARM_SERVICE_NAME replaces TRAEFIK_CONFIG_DIR - .woodpecker.yml: hardcode stack name better-argo-tunnels, update smoke test env vars Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
ccead8733a
commit
b2820ed47f
|
|
@ -49,7 +49,6 @@ steps:
|
|||
- echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us
|
||||
- apk add --no-cache git || true
|
||||
- export GIT_COMMIT=$${CI_COMMIT_SHA}
|
||||
- export GIT_COMMIT_DATE=$(git log -1 --format=%ci HEAD 2>/dev/null || echo "unknown")
|
||||
- echo "Building GIT_COMMIT=$GIT_COMMIT"
|
||||
# Build server image
|
||||
- docker build --target server -t git.nixc.us/colin/better-argo-tunnels:production .
|
||||
|
|
@ -79,7 +78,7 @@ steps:
|
|||
- echo "$${REGISTRY_PASSWORD}" | docker login git.nixc.us -u "$${REGISTRY_USER}" --password-stdin
|
||||
- docker pull git.nixc.us/colin/better-argo-tunnels:production
|
||||
- docker rm -f tunnel-smoke || true
|
||||
# Smoke: just verify the binary runs and prints startup log
|
||||
# Smoke: verify the binary runs and prints startup log
|
||||
- mkdir -p /tmp/smoke-keys
|
||||
- ssh-keygen -t ed25519 -f /tmp/smoke-keys/host_key -N "" -q
|
||||
- ssh-keygen -t ed25519 -f /tmp/smoke-keys/client_key -N "" -q
|
||||
|
|
@ -91,7 +90,7 @@ steps:
|
|||
-e AUTHORIZED_KEYS=/keys/authorized_keys \
|
||||
-e TRAEFIK_SSH_HOST=127.0.0.1 \
|
||||
-e TRAEFIK_SSH_KEY=/keys/host_key \
|
||||
-e TRAEFIK_CONFIG_DIR=/tmp/dynamic \
|
||||
-e SWARM_SERVICE_NAME=smoke-test \
|
||||
-v /tmp/smoke-keys:/keys:ro \
|
||||
git.nixc.us/colin/better-argo-tunnels:production
|
||||
- sleep 3
|
||||
|
|
@ -103,7 +102,7 @@ steps:
|
|||
branch: main
|
||||
event: [push, cron]
|
||||
|
||||
# Deploy to Swarm
|
||||
# Deploy to Swarm on ingress.nixc.us
|
||||
deploy-production:
|
||||
name: deploy-production
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
|
|
@ -129,7 +128,7 @@ steps:
|
|||
- echo "$${REGISTRY_PASSWORD}" | docker login -u "$${REGISTRY_USER}" --password-stdin git.nixc.us
|
||||
# Remove old stack
|
||||
- echo "Removing old stack..."
|
||||
- docker stack rm $${CI_REPO_NAME} || true
|
||||
- docker stack rm better-argo-tunnels || true
|
||||
- sleep 10
|
||||
# Refresh secrets
|
||||
- |
|
||||
|
|
@ -148,7 +147,7 @@ steps:
|
|||
docker secret ls | grep tunnel_
|
||||
# Deploy stack
|
||||
- echo "Deploying stack..."
|
||||
- docker stack deploy --with-registry-auth -c ./stack.production.yml $${CI_REPO_NAME}
|
||||
- docker stack deploy --with-registry-auth -c ./stack.production.yml better-argo-tunnels
|
||||
when:
|
||||
branch: main
|
||||
event: [push, cron]
|
||||
|
|
|
|||
|
|
@ -49,29 +49,29 @@ func main() {
|
|||
portStart := envInt("PORT_RANGE_START", 10000)
|
||||
portEnd := envInt("PORT_RANGE_END", 10100)
|
||||
|
||||
// Remote Traefik host config (SSH into the ingress host to manage routes).
|
||||
// Swarm manager SSH config (for updating service labels).
|
||||
traefikHost := envRequired("TRAEFIK_SSH_HOST")
|
||||
traefikUser := envOr("TRAEFIK_SSH_USER", "root")
|
||||
traefikKey := envRequired("TRAEFIK_SSH_KEY")
|
||||
traefikConfigDir := envOr("TRAEFIK_CONFIG_DIR", "/root/traefik/dynamic")
|
||||
serviceName := envOr("SWARM_SERVICE_NAME", "better-argo-tunnels_tunnel-server")
|
||||
entrypoint := envOr("TRAEFIK_ENTRYPOINT", "websecure")
|
||||
certResolver := envOr("TRAEFIK_CERT_RESOLVER", "letsencryptresolver")
|
||||
|
||||
// Load the SSH key for connecting to the Traefik host.
|
||||
// Load the SSH key for connecting to the Swarm manager.
|
||||
traefikSigner, err := server.LoadSigner(traefikKey)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to load Traefik SSH key: %v", err)
|
||||
}
|
||||
log.Printf("Loaded Traefik host SSH key")
|
||||
log.Printf("Loaded Swarm manager SSH key")
|
||||
|
||||
// Initialize port pool.
|
||||
pool := server.NewPortPool(portStart, portEnd)
|
||||
log.Printf("Port pool: %d-%d (%d ports)", portStart, portEnd, portEnd-portStart+1)
|
||||
|
||||
// Initialize Traefik label manager (remote SSH).
|
||||
// Initialize label manager (Swarm service update via SSH).
|
||||
labels, err := server.NewLabelManager(
|
||||
traefikHost, traefikUser, traefikSigner,
|
||||
traefikConfigDir, entrypoint, certResolver,
|
||||
serviceName, entrypoint, certResolver,
|
||||
)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to init label manager: %v", err)
|
||||
|
|
|
|||
|
|
@ -9,85 +9,126 @@ import (
|
|||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// LabelManager manages Traefik dynamic config on a remote host via SSH.
|
||||
// It SSHs into the Traefik host and writes per-tunnel YAML config files
|
||||
// into the Traefik file provider directory.
|
||||
// LabelManager manages Traefik routing labels on its own Swarm service
|
||||
// by SSHing into the Swarm manager and running docker service update.
|
||||
type LabelManager struct {
|
||||
mu sync.Mutex
|
||||
remoteHost string // e.g. "ingress.nixc.us" or "ingress.nixc.us:22"
|
||||
remoteUser string // SSH user on the Traefik host
|
||||
remoteHost string // Swarm manager, e.g. "ingress.nixc.us"
|
||||
remoteUser string // SSH user
|
||||
signer ssh.Signer
|
||||
configDir string // remote path where Traefik watches for file provider
|
||||
serviceName string // Swarm service name, e.g. "better-argo-tunnels_tunnel-server"
|
||||
entrypoint string // e.g. "websecure"
|
||||
certResolver string // e.g. "letsencryptresolver"
|
||||
labels map[string]bool // track which tunnel keys we've added
|
||||
}
|
||||
|
||||
// NewLabelManager creates a label manager that writes Traefik config via SSH.
|
||||
// NewLabelManager creates a label manager that updates Swarm service labels via SSH.
|
||||
func NewLabelManager(
|
||||
remoteHost, remoteUser string,
|
||||
signer ssh.Signer,
|
||||
configDir, entrypoint, certResolver string,
|
||||
serviceName, entrypoint, certResolver string,
|
||||
) (*LabelManager, error) {
|
||||
|
||||
lm := &LabelManager{
|
||||
remoteHost: remoteHost,
|
||||
remoteUser: remoteUser,
|
||||
signer: signer,
|
||||
configDir: configDir,
|
||||
serviceName: serviceName,
|
||||
entrypoint: entrypoint,
|
||||
certResolver: certResolver,
|
||||
labels: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Ensure the remote config directory exists.
|
||||
if err := lm.runRemote(fmt.Sprintf("mkdir -p %s", configDir)); err != nil {
|
||||
return nil, fmt.Errorf("ensure remote config dir: %w", err)
|
||||
// Verify we can reach the Swarm manager and the service exists.
|
||||
cmd := fmt.Sprintf("docker service inspect --format '{{.Spec.Name}}' %s", serviceName)
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
log.Printf("WARN: could not verify service %s (may not exist yet): %v", serviceName, err)
|
||||
} else {
|
||||
log.Printf("Verified Swarm service: %s", serviceName)
|
||||
}
|
||||
|
||||
log.Printf("Label manager ready (host=%s, dir=%s, ep=%s, resolver=%s)",
|
||||
remoteHost, configDir, entrypoint, certResolver)
|
||||
log.Printf("Label manager ready (host=%s, service=%s, ep=%s, resolver=%s)",
|
||||
remoteHost, serviceName, entrypoint, certResolver)
|
||||
|
||||
return lm, nil
|
||||
}
|
||||
|
||||
// Add writes a Traefik dynamic config file on the remote host for a tunnel.
|
||||
// Add adds Traefik routing labels to the Swarm service for a tunnel.
|
||||
func (lm *LabelManager) Add(tunKey, domain string, port int) error {
|
||||
lm.mu.Lock()
|
||||
defer lm.mu.Unlock()
|
||||
|
||||
routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
|
||||
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
|
||||
cfg := buildRouteConfig(routerName, serviceName, domain, port, lm.entrypoint, lm.certResolver)
|
||||
|
||||
remotePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
|
||||
|
||||
// Write the config file via SSH using cat heredoc.
|
||||
cmd := fmt.Sprintf("cat > %s << 'TRAEFIKEOF'\n%sTRAEFIKEOF", remotePath, cfg)
|
||||
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
return fmt.Errorf("write remote config %s: %w", remotePath, err)
|
||||
// Build the label-add flags for docker service update.
|
||||
labelArgs := []string{
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
|
||||
fmt.Sprintf("Host(`%s`)", domain)),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
|
||||
lm.entrypoint),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
|
||||
"true"),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
|
||||
lm.certResolver),
|
||||
labelFlag(fmt.Sprintf("traefik.http.routers.%s.service", routerName),
|
||||
serviceName),
|
||||
labelFlag(fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
|
||||
fmt.Sprintf("%d", port)),
|
||||
}
|
||||
|
||||
log.Printf("Wrote remote Traefik config: %s (domain=%s port=%d)", remotePath, domain, port)
|
||||
cmd := fmt.Sprintf("docker service update --label-add %s %s",
|
||||
strings.Join(labelArgs, " --label-add "), lm.serviceName)
|
||||
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
return fmt.Errorf("add labels for %s: %w", domain, err)
|
||||
}
|
||||
|
||||
lm.labels[tunKey] = true
|
||||
log.Printf("Added Swarm labels: %s -> %s:%d", domain, lm.serviceName, port)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove deletes the Traefik dynamic config file on the remote host.
|
||||
// Remove removes Traefik routing labels from the Swarm service for a tunnel.
|
||||
func (lm *LabelManager) Remove(tunKey string) error {
|
||||
lm.mu.Lock()
|
||||
defer lm.mu.Unlock()
|
||||
|
||||
remotePath := fmt.Sprintf("%s/tunnel-%s.yml", lm.configDir, tunKey)
|
||||
cmd := fmt.Sprintf("rm -f %s", remotePath)
|
||||
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
return fmt.Errorf("remove remote config %s: %w", remotePath, err)
|
||||
if !lm.labels[tunKey] {
|
||||
return nil // nothing to remove
|
||||
}
|
||||
|
||||
log.Printf("Removed remote Traefik config: %s", remotePath)
|
||||
routerName := fmt.Sprintf("tunnel-%s-router", tunKey)
|
||||
serviceName := fmt.Sprintf("tunnel-%s-service", tunKey)
|
||||
|
||||
// Build the label-rm flags.
|
||||
rmLabels := []string{
|
||||
fmt.Sprintf("traefik.http.routers.%s.rule", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.entrypoints", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.tls", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.tls.certresolver", routerName),
|
||||
fmt.Sprintf("traefik.http.routers.%s.service", routerName),
|
||||
fmt.Sprintf("traefik.http.services.%s.loadbalancer.server.port", serviceName),
|
||||
}
|
||||
|
||||
cmd := fmt.Sprintf("docker service update --label-rm %s %s",
|
||||
strings.Join(rmLabels, " --label-rm "), lm.serviceName)
|
||||
|
||||
if err := lm.runRemote(cmd); err != nil {
|
||||
return fmt.Errorf("remove labels for %s: %w", tunKey, err)
|
||||
}
|
||||
|
||||
delete(lm.labels, tunKey)
|
||||
log.Printf("Removed Swarm labels for tunnel: %s", tunKey)
|
||||
return nil
|
||||
}
|
||||
|
||||
// runRemote executes a command on the remote Traefik host via SSH.
|
||||
// labelFlag formats a --label-add value, quoting properly for shell.
|
||||
func labelFlag(key, value string) string {
|
||||
return fmt.Sprintf("'%s=%s'", key, value)
|
||||
}
|
||||
|
||||
// runRemote executes a command on the Swarm manager via SSH.
|
||||
func (lm *LabelManager) runRemote(cmd string) error {
|
||||
addr := lm.remoteHost
|
||||
if !strings.Contains(addr, ":") {
|
||||
|
|
@ -122,39 +163,6 @@ func (lm *LabelManager) runRemote(cmd string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// buildRouteConfig generates Traefik dynamic config YAML for one tunnel.
|
||||
func buildRouteConfig(
|
||||
routerName, serviceName, domain string,
|
||||
port int,
|
||||
entrypoint, certResolver string,
|
||||
) string {
|
||||
var b strings.Builder
|
||||
|
||||
b.WriteString("# Auto-generated by tunnel-server. Do not edit.\n")
|
||||
b.WriteString("http:\n")
|
||||
|
||||
// Router
|
||||
b.WriteString(" routers:\n")
|
||||
b.WriteString(fmt.Sprintf(" %s:\n", routerName))
|
||||
b.WriteString(fmt.Sprintf(" rule: \"Host(`%s`)\"\n", domain))
|
||||
b.WriteString(" entryPoints:\n")
|
||||
b.WriteString(fmt.Sprintf(" - %s\n", entrypoint))
|
||||
b.WriteString(" tls:\n")
|
||||
b.WriteString(fmt.Sprintf(" certResolver: %s\n", certResolver))
|
||||
b.WriteString(fmt.Sprintf(" service: %s\n", serviceName))
|
||||
|
||||
// Service — points to the tunnel-server's allocated port.
|
||||
// The tunnel-server container is on the same network as Traefik,
|
||||
// so Traefik can reach it by container name or IP.
|
||||
b.WriteString(" services:\n")
|
||||
b.WriteString(fmt.Sprintf(" %s:\n", serviceName))
|
||||
b.WriteString(" loadBalancer:\n")
|
||||
b.WriteString(" servers:\n")
|
||||
b.WriteString(fmt.Sprintf(" - url: \"http://tunnel-server:%d\"\n", port))
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// Close is a no-op — SSH connections are opened/closed per operation.
|
||||
func (lm *LabelManager) Close() error {
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ services:
|
|||
TRAEFIK_SSH_HOST: "ingress.nixc.us"
|
||||
TRAEFIK_SSH_USER: "root"
|
||||
TRAEFIK_SSH_KEY: "/run/secrets/traefik_deploy_key"
|
||||
TRAEFIK_CONFIG_DIR: "/root/traefik/dynamic"
|
||||
SWARM_SERVICE_NAME: "better-argo-tunnels_tunnel-server"
|
||||
TRAEFIK_ENTRYPOINT: "websecure"
|
||||
TRAEFIK_CERT_RESOLVER: "letsencryptresolver"
|
||||
HOSTNAME: "{{.Node.Hostname}}"
|
||||
|
|
@ -43,16 +43,24 @@ services:
|
|||
TASK_ID: "{{.Task.ID}}"
|
||||
ENVIRONMENT: "production"
|
||||
ports:
|
||||
- "2222:2222"
|
||||
- "10000-10100:10000-10100"
|
||||
- target: 2222
|
||||
published: 2222
|
||||
protocol: tcp
|
||||
mode: host
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == macmini1
|
||||
- node.hostname == ingress.nixc.us
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.tcp.routers.tunnel-ssh-router.rule: "HostSNI(`*`)"
|
||||
traefik.tcp.routers.tunnel-ssh-router.entrypoints: "ssh"
|
||||
traefik.tcp.services.tunnel-ssh-service.loadbalancer.server.port: "2222"
|
||||
traefik.docker.network: "traefik"
|
||||
# Dynamic tunnel labels are added at runtime via docker service update.
|
||||
# The base labels below just enable Traefik discovery.
|
||||
update_config:
|
||||
order: stop-first
|
||||
failure_action: rollback
|
||||
delay: 0s
|
||||
parallelism: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
|
|
|
|||
Loading…
Reference in New Issue