420 lines
13 KiB
Bash
Executable File
420 lines
13 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
################################################################################
|
|
# WOODPECKER CI PRODUCTION DEPLOYMENT SCRIPT
|
|
################################################################################
|
|
#
|
|
# ⚠️ WARNING: THIS SCRIPT IS EXCLUSIVELY FOR WOODPECKER CI USE
|
|
#
|
|
# This script is designed to run within the Woodpecker CI environment with
|
|
# specific environment variables and Docker socket access.
|
|
#
|
|
# 🚫 DO NOT RUN THIS ON A DEVELOPER WORKSTATION
|
|
# 🚫 This will attempt to remove production Docker stacks and secrets
|
|
# 🚫 This requires access to production Docker swarm manager nodes
|
|
#
|
|
# This script handles:
|
|
# - Production stack removal and cleanup
|
|
# - Docker secrets recreation with fresh values
|
|
# - New stack deployment with verification
|
|
# - Health checking and deployment validation
|
|
# - Rollback capability on failure
|
|
# - Concurrent execution prevention
|
|
#
|
|
################################################################################
|
|
|
|
set -euo pipefail
|
|
|
|
# Configuration
|
|
readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
readonly LOCK_FILE="/tmp/authelia-deploy.lock"
|
|
readonly MAX_RETRIES=3
|
|
readonly RETRY_DELAY=10
|
|
readonly DEPLOYMENT_TIMEOUT=300 # 5 minutes
|
|
readonly HEALTH_CHECK_TIMEOUT=120 # 2 minutes
|
|
readonly MIN_DISK_SPACE_MB=1000
|
|
|
|
# Color codes for output
|
|
readonly RED='\033[0;31m'
|
|
readonly GREEN='\033[0;32m'
|
|
readonly YELLOW='\033[1;33m'
|
|
readonly BLUE='\033[0;34m'
|
|
readonly PURPLE='\033[0;35m'
|
|
readonly NC='\033[0m' # No Color
|
|
|
|
# Global variables for cleanup
|
|
DEPLOYMENT_STARTED=false
|
|
OLD_IMAGE_ID=""
|
|
ROLLBACK_NEEDED=false
|
|
|
|
# Logging functions
|
|
log() {
|
|
echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
|
|
}
|
|
|
|
error() {
|
|
echo -e "${RED}[ERROR] $1${NC}"
|
|
}
|
|
|
|
success() {
|
|
echo -e "${GREEN}[SUCCESS] $1${NC}"
|
|
}
|
|
|
|
warning() {
|
|
echo -e "${YELLOW}[WARNING] $1${NC}"
|
|
}
|
|
|
|
debug() {
|
|
echo -e "${PURPLE}[DEBUG] $1${NC}"
|
|
}
|
|
|
|
# Cleanup function - runs on script exit
|
|
cleanup() {
|
|
local exit_code=$?
|
|
|
|
if [[ -f "$LOCK_FILE" ]]; then
|
|
debug "Removing deployment lock file"
|
|
rm -f "$LOCK_FILE"
|
|
fi
|
|
|
|
if [[ $exit_code -ne 0 && "$ROLLBACK_NEEDED" == "true" ]]; then
|
|
error "Deployment failed - attempting rollback..."
|
|
attempt_rollback
|
|
fi
|
|
|
|
debug "Cleanup completed with exit code: $exit_code"
|
|
exit $exit_code
|
|
}
|
|
|
|
# Set up cleanup trap
|
|
trap cleanup EXIT INT TERM
|
|
|
|
# Retry function for operations that might fail transiently
|
|
retry_command() {
|
|
local cmd="$1"
|
|
local description="$2"
|
|
local attempt=1
|
|
|
|
while [[ $attempt -le $MAX_RETRIES ]]; do
|
|
log "Attempt $attempt/$MAX_RETRIES: $description"
|
|
|
|
if eval "$cmd"; then
|
|
success "$description completed successfully"
|
|
return 0
|
|
else
|
|
if [[ $attempt -eq $MAX_RETRIES ]]; then
|
|
error "$description failed after $MAX_RETRIES attempts"
|
|
return 1
|
|
else
|
|
warning "$description failed, retrying in ${RETRY_DELAY}s..."
|
|
sleep $RETRY_DELAY
|
|
fi
|
|
fi
|
|
|
|
((attempt++))
|
|
done
|
|
}
|
|
|
|
# Pre-flight checks
|
|
pre_flight_checks() {
|
|
log "Running pre-flight checks..."
|
|
|
|
# Check if another deployment is running
|
|
if [[ -f "$LOCK_FILE" ]]; then
|
|
error "Another deployment is already running (lock file exists: $LOCK_FILE)"
|
|
error "If you're sure no other deployment is running, remove the lock file manually"
|
|
exit 1
|
|
fi
|
|
|
|
# Create lock file
|
|
echo "$$" > "$LOCK_FILE"
|
|
debug "Created deployment lock file"
|
|
|
|
# Verify we're running in CI environment
|
|
if [[ -z "${CI_REPO_NAME:-}" ]]; then
|
|
error "This script must only be run in Woodpecker CI environment!"
|
|
error "Missing CI_REPO_NAME environment variable"
|
|
exit 1
|
|
fi
|
|
|
|
# Check Docker daemon is responsive
|
|
if ! docker info >/dev/null 2>&1; then
|
|
error "Docker daemon is not responsive"
|
|
exit 1
|
|
fi
|
|
|
|
# Check available disk space
|
|
local available_space
|
|
available_space=$(df /var/lib/docker --output=avail --block-size=1M | tail -n1 | tr -d ' ')
|
|
if [[ $available_space -lt $MIN_DISK_SPACE_MB ]]; then
|
|
error "Insufficient disk space: ${available_space}MB available, ${MIN_DISK_SPACE_MB}MB required"
|
|
exit 1
|
|
fi
|
|
|
|
# Verify required environment variables
|
|
local required_vars=(
|
|
"REGISTRY_USER" "REGISTRY_PASSWORD" "CI_REPO_NAME"
|
|
"AUTHENTICATION_BACKEND_LDAP_PASSWORD" "IDENTITY_VALIDATION_RESET_PASSWORD_JWT_SECRET"
|
|
"STORAGE_ENCRYPTION_KEY" "SESSION_SECRET" "NOTIFIER_SMTP_PASSWORD"
|
|
"IDENTITY_PROVIDERS_OIDC_HMAC_SECRET" "IDENTITY_PROVIDERS_OIDC_ISSUER_PRIVATE_KEY"
|
|
"IDENTITY_PROVIDERS_OIDC_JWKS_KEY" "CLIENT_SECRET_HEADSCALE" "CLIENT_SECRET_HEADADMIN"
|
|
)
|
|
|
|
for var in "${required_vars[@]}"; do
|
|
if [[ -z "${!var:-}" ]]; then
|
|
error "Required environment variable $var is not set"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Check if stack file exists
|
|
if [[ ! -f "./stack.production.yml" ]]; then
|
|
error "Production stack file not found: ./stack.production.yml"
|
|
exit 1
|
|
fi
|
|
|
|
success "Pre-flight checks completed"
|
|
}
|
|
|
|
# Get current image ID for rollback purposes
|
|
get_current_image_id() {
|
|
if docker stack ps "${CI_REPO_NAME}" >/dev/null 2>&1; then
|
|
OLD_IMAGE_ID=$(docker stack ps "${CI_REPO_NAME}" --format "table {{.Image}}" | grep authelia | head -n1 || echo "")
|
|
if [[ -n "$OLD_IMAGE_ID" ]]; then
|
|
debug "Current image for rollback: $OLD_IMAGE_ID"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Rollback function
|
|
attempt_rollback() {
|
|
if [[ -n "$OLD_IMAGE_ID" && "$OLD_IMAGE_ID" != "IMAGE" ]]; then
|
|
warning "Attempting rollback to previous image: $OLD_IMAGE_ID"
|
|
|
|
# This would require a more complex rollback mechanism
|
|
# For now, just log the attempt
|
|
error "Rollback mechanism not yet implemented"
|
|
error "Manual intervention required"
|
|
error "Previous image was: $OLD_IMAGE_ID"
|
|
else
|
|
error "No previous image information available for rollback"
|
|
fi
|
|
}
|
|
|
|
# Enhanced Docker registry login with retries
|
|
docker_registry_login() {
|
|
log "Logging into Docker registry"
|
|
|
|
local login_cmd="echo '${REGISTRY_PASSWORD}' | docker login -u '${REGISTRY_USER}' --password-stdin git.nixc.us"
|
|
retry_command "$login_cmd" "Docker registry login"
|
|
}
|
|
|
|
# Wait for stack removal with timeout
|
|
wait_for_stack_removal() {
|
|
log "Verifying stack removal completed"
|
|
local timeout=$((DEPLOYMENT_TIMEOUT))
|
|
local elapsed=0
|
|
|
|
while docker stack ls | grep -q "${CI_REPO_NAME}"; do
|
|
if [[ $elapsed -ge $timeout ]]; then
|
|
error "Stack removal timeout after ${timeout} seconds"
|
|
return 1
|
|
fi
|
|
|
|
log "Stack still exists, waiting... (${elapsed}s/${timeout}s)"
|
|
sleep 5
|
|
elapsed=$((elapsed + 5))
|
|
done
|
|
|
|
success "Stack removal completed in ${elapsed} seconds"
|
|
}
|
|
|
|
# Enhanced secret management with validation
|
|
manage_secrets() {
|
|
log "Managing Docker secrets"
|
|
|
|
declare -a SECRETS=(
|
|
"AUTHENTICATION_BACKEND_LDAP_PASSWORD"
|
|
"IDENTITY_VALIDATION_RESET_PASSWORD_JWT_SECRET"
|
|
"STORAGE_ENCRYPTION_KEY"
|
|
"SESSION_SECRET"
|
|
"NOTIFIER_SMTP_PASSWORD"
|
|
"IDENTITY_PROVIDERS_OIDC_HMAC_SECRET"
|
|
"IDENTITY_PROVIDERS_OIDC_ISSUER_PRIVATE_KEY"
|
|
"IDENTITY_PROVIDERS_OIDC_JWKS_KEY"
|
|
"CLIENT_SECRET_HEADSCALE"
|
|
"CLIENT_SECRET_HEADADMIN"
|
|
)
|
|
|
|
# Remove old secrets
|
|
log "Removing old Docker secrets"
|
|
for secret in "${SECRETS[@]}"; do
|
|
if docker secret ls --format "{{.Name}}" | grep -q "^${secret}$"; then
|
|
docker secret rm "$secret" || true
|
|
debug "Removed secret: $secret"
|
|
else
|
|
debug "Secret $secret did not exist"
|
|
fi
|
|
done
|
|
|
|
# Create new secrets with validation
|
|
log "Creating new Docker secrets with updated values"
|
|
for secret in "${SECRETS[@]}"; do
|
|
env_var="${secret}"
|
|
if [[ -n "${!env_var:-}" ]]; then
|
|
if echo "${!env_var}" | docker secret create "$secret" -; then
|
|
success "Created secret: $secret"
|
|
else
|
|
error "Failed to create secret: $secret"
|
|
return 1
|
|
fi
|
|
else
|
|
error "Environment variable $env_var is not set!"
|
|
return 1
|
|
fi
|
|
done
|
|
|
|
# Verify all secrets were created
|
|
log "Verifying secret creation"
|
|
for secret in "${SECRETS[@]}"; do
|
|
if ! docker secret ls --format "{{.Name}}" | grep -q "^${secret}$"; then
|
|
error "Secret verification failed: $secret was not created"
|
|
return 1
|
|
fi
|
|
done
|
|
|
|
success "All secrets created and verified"
|
|
}
|
|
|
|
# Enhanced deployment with better error handling
|
|
deploy_stack() {
|
|
log "Deploying new stack with fresh secrets"
|
|
ROLLBACK_NEEDED=true
|
|
DEPLOYMENT_STARTED=true
|
|
|
|
local deploy_cmd="docker stack deploy --with-registry-auth -c ./stack.production.yml '${CI_REPO_NAME}'"
|
|
|
|
if ! retry_command "$deploy_cmd" "Stack deployment"; then
|
|
error "Stack deployment failed"
|
|
return 1
|
|
fi
|
|
|
|
success "Stack deployment command completed"
|
|
}
|
|
|
|
# Enhanced health checking with multiple validation methods
|
|
comprehensive_health_check() {
|
|
log "Starting comprehensive health check (${HEALTH_CHECK_TIMEOUT}s timeout)"
|
|
local start_time=$(date +%s)
|
|
local timeout=$HEALTH_CHECK_TIMEOUT
|
|
|
|
# Wait for services to initialize
|
|
log "Waiting for services to initialize (30 seconds)"
|
|
sleep 30
|
|
|
|
# Check deployment status
|
|
log "Checking deployment status"
|
|
docker stack ps "${CI_REPO_NAME}"
|
|
|
|
# Health check loop with multiple validation methods
|
|
local check_count=0
|
|
local max_checks=$((timeout / 5))
|
|
|
|
while [[ $check_count -lt $max_checks ]]; do
|
|
local current_time=$(date +%s)
|
|
local elapsed=$((current_time - start_time))
|
|
|
|
log "Health check attempt $((check_count + 1))/${max_checks} (${elapsed}s elapsed)"
|
|
|
|
# Check if authelia service is running
|
|
if docker stack ps "${CI_REPO_NAME}" | grep -q "authelia_authelia.*Running"; then
|
|
success "✅ Authelia service is running!"
|
|
|
|
# Additional verification checks
|
|
log "Performing additional health verification..."
|
|
sleep 5
|
|
|
|
# Check service is stable (not restarting)
|
|
local service_info
|
|
service_info=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | head -n1)
|
|
|
|
if echo "$service_info" | grep -q "Running"; then
|
|
# Check if there are any failed instances
|
|
local failed_count
|
|
failed_count=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -c "Failed" || echo "0")
|
|
|
|
if [[ $failed_count -eq 0 ]]; then
|
|
success "🎉 Production deployment completed successfully!"
|
|
success "Authelia service is healthy and stable"
|
|
success "Total deployment time: ${elapsed} seconds"
|
|
ROLLBACK_NEEDED=false
|
|
return 0
|
|
else
|
|
warning "Found $failed_count failed service instances, continuing health checks..."
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [[ $elapsed -ge $timeout ]]; then
|
|
break
|
|
fi
|
|
|
|
log "Waiting for authelia service... (${elapsed}s/${timeout}s)"
|
|
sleep 5
|
|
((check_count++))
|
|
done
|
|
|
|
# Health check failed
|
|
error "❌ Health check failed after ${timeout} seconds"
|
|
error "Deployment verification failed"
|
|
|
|
# Show detailed debugging information
|
|
error "=== DEBUGGING INFORMATION ==="
|
|
error "Stack status:"
|
|
docker stack ps "${CI_REPO_NAME}" || true
|
|
|
|
error "Authelia service logs (last 30 lines):"
|
|
docker service logs "${CI_REPO_NAME}_authelia" --tail 30 || true
|
|
|
|
error "Docker service inspect:"
|
|
docker service inspect "${CI_REPO_NAME}_authelia" --pretty || true
|
|
|
|
return 1
|
|
}
|
|
|
|
# Main deployment function
|
|
main() {
|
|
log "🚀 Starting production deployment for ${CI_REPO_NAME}"
|
|
|
|
# Pre-flight checks
|
|
pre_flight_checks
|
|
|
|
# Get current state for potential rollback
|
|
get_current_image_id
|
|
|
|
# Step 1: Docker registry login
|
|
docker_registry_login
|
|
|
|
# Step 2: Remove old stack to release secrets
|
|
log "Removing old stack to release secrets"
|
|
docker stack rm "${CI_REPO_NAME}" || true
|
|
|
|
# Step 3: Wait for complete stack removal with timeout
|
|
log "Waiting for complete stack removal (30 seconds minimum)"
|
|
sleep 30
|
|
wait_for_stack_removal
|
|
|
|
# Step 4 & 5: Manage secrets (remove old, create new)
|
|
manage_secrets
|
|
|
|
# Step 6: Deploy new stack
|
|
deploy_stack
|
|
|
|
# Step 7-9: Comprehensive health checking
|
|
comprehensive_health_check
|
|
|
|
success "🎉 Production deployment completed successfully!"
|
|
}
|
|
|
|
# Run main function
|
|
main "$@" |