diff --git a/scripts/ci-deploy-production.sh b/scripts/ci-deploy-production.sh index 8f8690d..27d9c71 100755 --- a/scripts/ci-deploy-production.sh +++ b/scripts/ci-deploy-production.sh @@ -27,14 +27,12 @@ set -euo pipefail # Configuration SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -LOCK_FILE="/tmp/authelia-deploy.lock" MAX_RETRIES=3 -RETRY_DELAY=5 # Reduced from 10s to 5s +RETRY_DELAY=5 +FORCE_PULL=true # Always pull latest images DEPLOYMENT_TIMEOUT=180 # Reduced from 300s to 180s (3 minutes) HEALTH_CHECK_TIMEOUT=90 # Reduced from 120s to 90s -FORCE_PULL=true # Always pull latest images - # Color codes for output RED='\033[0;31m' GREEN='\033[0;32m' @@ -74,27 +72,7 @@ debug() { # Cleanup function - runs on script exit cleanup() { local exit_code=$? - - if [ -f "$LOCK_FILE" ]; then - debug "Removing deployment lock file" - rm -f "$LOCK_FILE" - fi - - if [ $exit_code -ne 0 ]; then - error "Deployment failed with exit code: $exit_code" - log "📊 Providing final deployment status for debugging..." - - # Show final stack status for debugging - if docker stack ls | grep -q "${CI_REPO_NAME}"; then - error "=== FINAL STACK STATUS ===" - docker stack ps "${CI_REPO_NAME}" --no-trunc || true - docker stack services "${CI_REPO_NAME}" || true - else - warning "Stack ${CI_REPO_NAME} no longer exists" - fi - fi - - debug "Cleanup completed with exit code: $exit_code" + debug "Script completed with exit code: $exit_code" exit $exit_code } @@ -131,17 +109,6 @@ retry_command() { pre_flight_checks() { log "Running pre-flight checks..." - # Check if another deployment is running - if [ -f "$LOCK_FILE" ]; then - error "Another deployment is already running (lock file exists: $LOCK_FILE)" - error "If you're sure no other deployment is running, remove the lock file manually" - exit 1 - fi - - # Create lock file - echo "$$" > "$LOCK_FILE" - debug "Created deployment lock file" - # Verify we're running in CI environment if [ -z "${CI_REPO_NAME:-}" ]; then error "This script must only be run in Woodpecker CI environment!" @@ -155,8 +122,6 @@ pre_flight_checks() { exit 1 fi - - # Verify required environment variables REQUIRED_VARS="REGISTRY_USER REGISTRY_PASSWORD CI_REPO_NAME AUTHENTICATION_BACKEND_LDAP_PASSWORD IDENTITY_VALIDATION_RESET_PASSWORD_JWT_SECRET STORAGE_ENCRYPTION_KEY SESSION_SECRET NOTIFIER_SMTP_PASSWORD IDENTITY_PROVIDERS_OIDC_HMAC_SECRET IDENTITY_PROVIDERS_OIDC_ISSUER_PRIVATE_KEY IDENTITY_PROVIDERS_OIDC_JWKS_KEY CLIENT_SECRET_HEADSCALE CLIENT_SECRET_HEADADMIN" @@ -392,100 +357,46 @@ deploy_stack() { success "Stack deployment command completed" } -# Enhanced health checking focused on image verification and debugging +# Simple deployment verification - just deploy and get logs if it fails comprehensive_health_check() { - log "🔍 Starting deployment verification (${HEALTH_CHECK_TIMEOUT}s timeout)" - local start_time=$(date +%s) - local timeout=$HEALTH_CHECK_TIMEOUT + log "🔍 Waiting for services to start..." - # Database initialization wait - giving MariaDB time to start - log "Database initialization wait (45 seconds)..." - sleep 45 + # Wait for database initialization + log "Waiting 60 seconds for database initialization..." + sleep 60 - # Get immediate deployment status - log "Checking deployment status" + # Check deployment status + log "Final deployment status:" docker stack ps "${CI_REPO_NAME}" - # Image verification loop - local check_count=0 - local max_checks=$((timeout / 10)) # Check every 10 seconds + # Get logs for any failed services + log "Checking for failures and getting logs..." - while [ $check_count -lt $max_checks ]; do - local current_time=$(date +%s) - local elapsed=$((current_time - start_time)) - - log "Verification check ${check_count}/${max_checks} (${elapsed}s elapsed)" - - # Get current service status - local service_status - service_status=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Name}}\t{{.CurrentState}}\t{{.Error}}" | grep "authelia_authelia" | head -n1) - - if [ -n "$service_status" ]; then - local name=$(echo "$service_status" | cut -f1) - local state=$(echo "$service_status" | cut -f2) - local error_msg=$(echo "$service_status" | cut -f3) - - log "Current Authelia state: $state" - - # Check for Running state - if echo "$state" | grep -q "Running"; then - # Verify image hash - local current_image - current_image=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Image}}" | grep authelia | head -n1) - - log "🎯 Current image: $current_image" - log "🎯 Expected image hash: $NEW_IMAGE_HASH" - - if echo "$current_image" | grep -q "$NEW_IMAGE_HASH" || [ -z "$NEW_IMAGE_HASH" ]; then - success "✅ Authelia service is healthy and running with correct image!" - success "🎯 Using image: $current_image" - success "⚡ Total deployment time: ${elapsed} seconds" - return 0 - else - warning "⚠️ Service running but using different image than expected" - warning "Current: $current_image" - warning "Expected hash: $NEW_IMAGE_HASH" - warning "This may be normal if the image hasn't changed" - fi - elif echo "$state" | grep -q "Failed\|Rejected\|Shutdown"; then - warning "❌ Service failed: $state" - if [ -n "$error_msg" ]; then - error "Error: $error_msg" - fi - # Get recent logs for debugging - log "📋 Getting recent logs for debugging..." - docker service logs "${CI_REPO_NAME}_authelia" --tail 20 2>/dev/null || echo "No logs available" - else - debug "Service state: $state (still starting up)" - fi - fi - - if [ $elapsed -ge $timeout ]; then - warning "⏰ Reached timeout after ${elapsed} seconds" - log "📊 Final status for debugging:" - docker stack ps "${CI_REPO_NAME}" --no-trunc || true - break - fi - - sleep 10 - check_count=$((check_count + 1)) - done + # Check Authelia + if docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -q "Failed"; then + error "❌ Authelia service failed - getting logs:" + docker service logs "${CI_REPO_NAME}_authelia" --tail 30 2>/dev/null || echo "No logs available" + elif docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -q "Running"; then + success "✅ Authelia service is running" + else + warning "⚠️ Authelia service status unclear - getting logs:" + docker service logs "${CI_REPO_NAME}_authelia" --tail 20 2>/dev/null || echo "No logs available" + fi - # Deployment verification completed - warning "📊 Deployment verification completed - check logs above for status" + # Check MariaDB + if docker stack ps "${CI_REPO_NAME}" | grep "authelia_mariadb" | grep -q "Failed"; then + error "❌ MariaDB service failed - getting logs:" + docker service logs "${CI_REPO_NAME}_mariadb" --tail 20 2>/dev/null || echo "No logs available" + elif docker stack ps "${CI_REPO_NAME}" | grep "authelia_mariadb" | grep -q "Running"; then + success "✅ MariaDB service is running" + fi - # Get final diagnostic info - log "🔍 Final diagnostics..." - local services="authelia mariadb redis" - for service in $services; do - if docker service ls --format "{{.Name}}" | grep -q "${CI_REPO_NAME}_${service}"; then - log "=== ${service} STATUS ===" - docker service logs "${CI_REPO_NAME}_${service}" --tail 10 2>/dev/null || echo "No logs available" - fi - done + # Check Redis + if docker stack ps "${CI_REPO_NAME}" | grep "authelia_redis" | grep -q "Running"; then + success "✅ Redis service is running" + fi - # Don't fail - let it run for debugging - warning "Deployment may still be starting - leaving stack running for debugging" + log "Deployment completed - check logs above for any issues" return 0 }