#!/bin/bash ################################################################################ # WOODPECKER CI PRODUCTION DEPLOYMENT SCRIPT ################################################################################ # # ⚠️ WARNING: THIS SCRIPT IS EXCLUSIVELY FOR WOODPECKER CI USE # # This script is designed to run within the Woodpecker CI environment with # specific environment variables and Docker socket access. # # 🚫 DO NOT RUN THIS ON A DEVELOPER WORKSTATION # 🚫 This will attempt to remove production Docker stacks and secrets # 🚫 This requires access to production Docker swarm manager nodes # # This script handles: # - Production stack removal and cleanup # - Docker secrets recreation with fresh values # - New stack deployment with verification # - Health checking and deployment validation # - Rollback capability on failure # - Concurrent execution prevention # ################################################################################ set -euo pipefail # Configuration readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" readonly LOCK_FILE="/tmp/authelia-deploy.lock" readonly MAX_RETRIES=3 readonly RETRY_DELAY=10 readonly DEPLOYMENT_TIMEOUT=300 # 5 minutes readonly HEALTH_CHECK_TIMEOUT=120 # 2 minutes readonly MIN_DISK_SPACE_MB=1000 # Color codes for output readonly RED='\033[0;31m' readonly GREEN='\033[0;32m' readonly YELLOW='\033[1;33m' readonly BLUE='\033[0;34m' readonly PURPLE='\033[0;35m' readonly NC='\033[0m' # No Color # Global variables for cleanup DEPLOYMENT_STARTED=false OLD_IMAGE_ID="" ROLLBACK_NEEDED=false # Logging functions log() { echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}" } error() { echo -e "${RED}[ERROR] $1${NC}" } success() { echo -e "${GREEN}[SUCCESS] $1${NC}" } warning() { echo -e "${YELLOW}[WARNING] $1${NC}" } debug() { echo -e "${PURPLE}[DEBUG] $1${NC}" } # Cleanup function - runs on script exit cleanup() { local exit_code=$? if [[ -f "$LOCK_FILE" ]]; then debug "Removing deployment lock file" rm -f "$LOCK_FILE" fi if [[ $exit_code -ne 0 && "$ROLLBACK_NEEDED" == "true" ]]; then error "Deployment failed - attempting rollback..." attempt_rollback fi debug "Cleanup completed with exit code: $exit_code" exit $exit_code } # Set up cleanup trap trap cleanup EXIT INT TERM # Retry function for operations that might fail transiently retry_command() { local cmd="$1" local description="$2" local attempt=1 while [[ $attempt -le $MAX_RETRIES ]]; do log "Attempt $attempt/$MAX_RETRIES: $description" if eval "$cmd"; then success "$description completed successfully" return 0 else if [[ $attempt -eq $MAX_RETRIES ]]; then error "$description failed after $MAX_RETRIES attempts" return 1 else warning "$description failed, retrying in ${RETRY_DELAY}s..." sleep $RETRY_DELAY fi fi ((attempt++)) done } # Pre-flight checks pre_flight_checks() { log "Running pre-flight checks..." # Check if another deployment is running if [[ -f "$LOCK_FILE" ]]; then error "Another deployment is already running (lock file exists: $LOCK_FILE)" error "If you're sure no other deployment is running, remove the lock file manually" exit 1 fi # Create lock file echo "$$" > "$LOCK_FILE" debug "Created deployment lock file" # Verify we're running in CI environment if [[ -z "${CI_REPO_NAME:-}" ]]; then error "This script must only be run in Woodpecker CI environment!" error "Missing CI_REPO_NAME environment variable" exit 1 fi # Check Docker daemon is responsive if ! docker info >/dev/null 2>&1; then error "Docker daemon is not responsive" exit 1 fi # Check available disk space local available_space available_space=$(df /var/lib/docker --output=avail --block-size=1M | tail -n1 | tr -d ' ') if [[ $available_space -lt $MIN_DISK_SPACE_MB ]]; then error "Insufficient disk space: ${available_space}MB available, ${MIN_DISK_SPACE_MB}MB required" exit 1 fi # Verify required environment variables local required_vars=( "REGISTRY_USER" "REGISTRY_PASSWORD" "CI_REPO_NAME" "AUTHENTICATION_BACKEND_LDAP_PASSWORD" "IDENTITY_VALIDATION_RESET_PASSWORD_JWT_SECRET" "STORAGE_ENCRYPTION_KEY" "SESSION_SECRET" "NOTIFIER_SMTP_PASSWORD" "IDENTITY_PROVIDERS_OIDC_HMAC_SECRET" "IDENTITY_PROVIDERS_OIDC_ISSUER_PRIVATE_KEY" "IDENTITY_PROVIDERS_OIDC_JWKS_KEY" "CLIENT_SECRET_HEADSCALE" "CLIENT_SECRET_HEADADMIN" ) for var in "${required_vars[@]}"; do if [[ -z "${!var:-}" ]]; then error "Required environment variable $var is not set" exit 1 fi done # Check if stack file exists if [[ ! -f "./stack.production.yml" ]]; then error "Production stack file not found: ./stack.production.yml" exit 1 fi success "Pre-flight checks completed" } # Get current image ID for rollback purposes get_current_image_id() { if docker stack ps "${CI_REPO_NAME}" >/dev/null 2>&1; then OLD_IMAGE_ID=$(docker stack ps "${CI_REPO_NAME}" --format "table {{.Image}}" | grep authelia | head -n1 || echo "") if [[ -n "$OLD_IMAGE_ID" ]]; then debug "Current image for rollback: $OLD_IMAGE_ID" fi fi } # Rollback function attempt_rollback() { if [[ -n "$OLD_IMAGE_ID" && "$OLD_IMAGE_ID" != "IMAGE" ]]; then warning "Attempting rollback to previous image: $OLD_IMAGE_ID" # This would require a more complex rollback mechanism # For now, just log the attempt error "Rollback mechanism not yet implemented" error "Manual intervention required" error "Previous image was: $OLD_IMAGE_ID" else error "No previous image information available for rollback" fi } # Enhanced Docker registry login with retries docker_registry_login() { log "Logging into Docker registry" local login_cmd="echo '${REGISTRY_PASSWORD}' | docker login -u '${REGISTRY_USER}' --password-stdin git.nixc.us" retry_command "$login_cmd" "Docker registry login" } # Wait for stack removal with timeout wait_for_stack_removal() { log "Verifying stack removal completed" local timeout=$((DEPLOYMENT_TIMEOUT)) local elapsed=0 while docker stack ls | grep -q "${CI_REPO_NAME}"; do if [[ $elapsed -ge $timeout ]]; then error "Stack removal timeout after ${timeout} seconds" return 1 fi log "Stack still exists, waiting... (${elapsed}s/${timeout}s)" sleep 5 elapsed=$((elapsed + 5)) done success "Stack removal completed in ${elapsed} seconds" } # Enhanced secret management with validation manage_secrets() { log "Managing Docker secrets" declare -a SECRETS=( "AUTHENTICATION_BACKEND_LDAP_PASSWORD" "IDENTITY_VALIDATION_RESET_PASSWORD_JWT_SECRET" "STORAGE_ENCRYPTION_KEY" "SESSION_SECRET" "NOTIFIER_SMTP_PASSWORD" "IDENTITY_PROVIDERS_OIDC_HMAC_SECRET" "IDENTITY_PROVIDERS_OIDC_ISSUER_PRIVATE_KEY" "IDENTITY_PROVIDERS_OIDC_JWKS_KEY" "CLIENT_SECRET_HEADSCALE" "CLIENT_SECRET_HEADADMIN" ) # Remove old secrets log "Removing old Docker secrets" for secret in "${SECRETS[@]}"; do if docker secret ls --format "{{.Name}}" | grep -q "^${secret}$"; then docker secret rm "$secret" || true debug "Removed secret: $secret" else debug "Secret $secret did not exist" fi done # Create new secrets with validation log "Creating new Docker secrets with updated values" for secret in "${SECRETS[@]}"; do env_var="${secret}" if [[ -n "${!env_var:-}" ]]; then if echo "${!env_var}" | docker secret create "$secret" -; then success "Created secret: $secret" else error "Failed to create secret: $secret" return 1 fi else error "Environment variable $env_var is not set!" return 1 fi done # Verify all secrets were created log "Verifying secret creation" for secret in "${SECRETS[@]}"; do if ! docker secret ls --format "{{.Name}}" | grep -q "^${secret}$"; then error "Secret verification failed: $secret was not created" return 1 fi done success "All secrets created and verified" } # Enhanced deployment with better error handling deploy_stack() { log "Deploying new stack with fresh secrets" ROLLBACK_NEEDED=true DEPLOYMENT_STARTED=true local deploy_cmd="docker stack deploy --with-registry-auth -c ./stack.production.yml '${CI_REPO_NAME}'" if ! retry_command "$deploy_cmd" "Stack deployment"; then error "Stack deployment failed" return 1 fi success "Stack deployment command completed" } # Enhanced health checking with multiple validation methods comprehensive_health_check() { log "Starting comprehensive health check (${HEALTH_CHECK_TIMEOUT}s timeout)" local start_time=$(date +%s) local timeout=$HEALTH_CHECK_TIMEOUT # Wait for services to initialize log "Waiting for services to initialize (30 seconds)" sleep 30 # Check deployment status log "Checking deployment status" docker stack ps "${CI_REPO_NAME}" # Health check loop with multiple validation methods local check_count=0 local max_checks=$((timeout / 5)) while [[ $check_count -lt $max_checks ]]; do local current_time=$(date +%s) local elapsed=$((current_time - start_time)) log "Health check attempt $((check_count + 1))/${max_checks} (${elapsed}s elapsed)" # Check if authelia service is running if docker stack ps "${CI_REPO_NAME}" | grep -q "authelia_authelia.*Running"; then success "✅ Authelia service is running!" # Additional verification checks log "Performing additional health verification..." sleep 5 # Check service is stable (not restarting) local service_info service_info=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | head -n1) if echo "$service_info" | grep -q "Running"; then # Check if there are any failed instances local failed_count failed_count=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -c "Failed" || echo "0") if [[ $failed_count -eq 0 ]]; then success "🎉 Production deployment completed successfully!" success "Authelia service is healthy and stable" success "Total deployment time: ${elapsed} seconds" ROLLBACK_NEEDED=false return 0 else warning "Found $failed_count failed service instances, continuing health checks..." fi fi fi if [[ $elapsed -ge $timeout ]]; then break fi log "Waiting for authelia service... (${elapsed}s/${timeout}s)" sleep 5 ((check_count++)) done # Health check failed error "❌ Health check failed after ${timeout} seconds" error "Deployment verification failed" # Show detailed debugging information error "=== DEBUGGING INFORMATION ===" error "Stack status:" docker stack ps "${CI_REPO_NAME}" || true error "Authelia service logs (last 30 lines):" docker service logs "${CI_REPO_NAME}_authelia" --tail 30 || true error "Docker service inspect:" docker service inspect "${CI_REPO_NAME}_authelia" --pretty || true return 1 } # Main deployment function main() { log "🚀 Starting production deployment for ${CI_REPO_NAME}" # Pre-flight checks pre_flight_checks # Get current state for potential rollback get_current_image_id # Step 1: Docker registry login docker_registry_login # Step 2: Remove old stack to release secrets log "Removing old stack to release secrets" docker stack rm "${CI_REPO_NAME}" || true # Step 3: Wait for complete stack removal with timeout log "Waiting for complete stack removal (30 seconds minimum)" sleep 30 wait_for_stack_removal # Step 4 & 5: Manage secrets (remove old, create new) manage_secrets # Step 6: Deploy new stack deploy_stack # Step 7-9: Comprehensive health checking comprehensive_health_check success "🎉 Production deployment completed successfully!" } # Run main function main "$@"