Compare commits
	
		
			No commits in common. "2cb6159eea0105f8b93231aeeef7cae3d5b1685b" and "bf09520c1db4edca60279e49d30b7bdc89944686" have entirely different histories.
		
	
	
		
			2cb6159eea
			...
			bf09520c1d
		
	
		|  | @ -205,7 +205,7 @@ steps: | ||||||
|     volumes: |     volumes: | ||||||
|       - /var/run/docker.sock:/var/run/docker.sock |       - /var/run/docker.sock:/var/run/docker.sock | ||||||
|     commands: |     commands: | ||||||
|       - bash ./scripts/ci-deploy-production.sh |       - ./scripts/ci-deploy-production.sh | ||||||
|     when: |     when: | ||||||
|       branch: main |       branch: main | ||||||
|       event: [push, cron] |       event: [push, cron] | ||||||
|  |  | ||||||
|  | @ -29,11 +29,10 @@ set -euo pipefail | ||||||
| readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||||||
| readonly LOCK_FILE="/tmp/authelia-deploy.lock" | readonly LOCK_FILE="/tmp/authelia-deploy.lock" | ||||||
| readonly MAX_RETRIES=3 | readonly MAX_RETRIES=3 | ||||||
| readonly RETRY_DELAY=5  # Reduced from 10s to 5s | readonly RETRY_DELAY=10 | ||||||
| readonly DEPLOYMENT_TIMEOUT=180  # Reduced from 300s to 180s (3 minutes) | readonly DEPLOYMENT_TIMEOUT=300  # 5 minutes | ||||||
| readonly HEALTH_CHECK_TIMEOUT=90  # Reduced from 120s to 90s | readonly HEALTH_CHECK_TIMEOUT=120  # 2 minutes | ||||||
| readonly MIN_DISK_SPACE_MB=500  # Reduced from 1000MB to 500MB | readonly MIN_DISK_SPACE_MB=1000 | ||||||
| readonly FORCE_PULL=true  # Always pull latest images |  | ||||||
| 
 | 
 | ||||||
| # Color codes for output | # Color codes for output | ||||||
| readonly RED='\033[0;31m' | readonly RED='\033[0;31m' | ||||||
|  | @ -41,13 +40,11 @@ readonly GREEN='\033[0;32m' | ||||||
| readonly YELLOW='\033[1;33m' | readonly YELLOW='\033[1;33m' | ||||||
| readonly BLUE='\033[0;34m' | readonly BLUE='\033[0;34m' | ||||||
| readonly PURPLE='\033[0;35m' | readonly PURPLE='\033[0;35m' | ||||||
| readonly CYAN='\033[0;36m' |  | ||||||
| readonly NC='\033[0m' # No Color | readonly NC='\033[0m' # No Color | ||||||
| 
 | 
 | ||||||
| # Global variables for cleanup | # Global variables for cleanup | ||||||
| DEPLOYMENT_STARTED=false | DEPLOYMENT_STARTED=false | ||||||
| OLD_IMAGE_HASH="" | OLD_IMAGE_ID="" | ||||||
| NEW_IMAGE_HASH="" |  | ||||||
| ROLLBACK_NEEDED=false | ROLLBACK_NEEDED=false | ||||||
| 
 | 
 | ||||||
| # Logging functions | # Logging functions | ||||||
|  | @ -182,23 +179,23 @@ pre_flight_checks() { | ||||||
| # Get current image ID for rollback purposes | # Get current image ID for rollback purposes | ||||||
| get_current_image_id() { | get_current_image_id() { | ||||||
|     if docker stack ps "${CI_REPO_NAME}" >/dev/null 2>&1; then |     if docker stack ps "${CI_REPO_NAME}" >/dev/null 2>&1; then | ||||||
|         OLD_IMAGE_HASH=$(docker stack ps "${CI_REPO_NAME}" --format "table {{.Image}}" | grep authelia | head -n1 || echo "") |         OLD_IMAGE_ID=$(docker stack ps "${CI_REPO_NAME}" --format "table {{.Image}}" | grep authelia | head -n1 || echo "") | ||||||
|         if [[ -n "$OLD_IMAGE_HASH" ]]; then |         if [[ -n "$OLD_IMAGE_ID" ]]; then | ||||||
|             debug "Current image for rollback: $OLD_IMAGE_HASH" |             debug "Current image for rollback: $OLD_IMAGE_ID" | ||||||
|         fi |         fi | ||||||
|     fi |     fi | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # Rollback function | # Rollback function | ||||||
| attempt_rollback() { | attempt_rollback() { | ||||||
|     if [[ -n "$OLD_IMAGE_HASH" && "$OLD_IMAGE_HASH" != "IMAGE" ]]; then |     if [[ -n "$OLD_IMAGE_ID" && "$OLD_IMAGE_ID" != "IMAGE" ]]; then | ||||||
|         warning "Attempting rollback to previous image: $OLD_IMAGE_HASH" |         warning "Attempting rollback to previous image: $OLD_IMAGE_ID" | ||||||
|          |          | ||||||
|         # This would require a more complex rollback mechanism |         # This would require a more complex rollback mechanism | ||||||
|         # For now, just log the attempt |         # For now, just log the attempt | ||||||
|         error "Rollback mechanism not yet implemented" |         error "Rollback mechanism not yet implemented" | ||||||
|         error "Manual intervention required" |         error "Manual intervention required" | ||||||
|         error "Previous image was: $OLD_IMAGE_HASH" |         error "Previous image was: $OLD_IMAGE_ID" | ||||||
|     else |     else | ||||||
|         error "No previous image information available for rollback" |         error "No previous image information available for rollback" | ||||||
|     fi |     fi | ||||||
|  | @ -212,107 +209,10 @@ docker_registry_login() { | ||||||
|     retry_command "$login_cmd" "Docker registry login" |     retry_command "$login_cmd" "Docker registry login" | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # Force pull latest images to ensure we deploy the newest version | # Wait for stack removal with timeout | ||||||
| force_pull_latest_images() { |  | ||||||
|     log "🚀 Force pulling latest images to ensure fresh deployment" |  | ||||||
|      |  | ||||||
|     # Get the image names from docker-compose production file |  | ||||||
|     local authelia_image="git.nixc.us/nixius/authelia:production-authelia" |  | ||||||
|     local mariadb_image="git.nixc.us/nixius/authelia:production-mariadb"  |  | ||||||
|     local redis_image="git.nixc.us/nixius/authelia:production-redis" |  | ||||||
|      |  | ||||||
|     # Pull each image and capture new hashes |  | ||||||
|     log "Pulling Authelia image..." |  | ||||||
|     if docker pull "$authelia_image"; then |  | ||||||
|         NEW_IMAGE_HASH=$(docker images --format "table {{.Repository}}:{{.Tag}}\t{{.ID}}" | grep "production-authelia" | awk '{print $2}' | head -n1) |  | ||||||
|         success "✅ Authelia image pulled: $NEW_IMAGE_HASH" |  | ||||||
|     else |  | ||||||
|         error "❌ Failed to pull Authelia image" |  | ||||||
|         return 1 |  | ||||||
|     fi |  | ||||||
|      |  | ||||||
|     log "Pulling MariaDB image..." |  | ||||||
|     retry_command "docker pull $mariadb_image" "MariaDB image pull" |  | ||||||
|      |  | ||||||
|     log "Pulling Redis image..." |  | ||||||
|     retry_command "docker pull $redis_image" "Redis image pull" |  | ||||||
|      |  | ||||||
|     # Verify we have a new image hash |  | ||||||
|     if [[ -n "$NEW_IMAGE_HASH" && "$NEW_IMAGE_HASH" != "$OLD_IMAGE_HASH" ]]; then |  | ||||||
|         success "🔄 New image detected: $OLD_IMAGE_HASH → $NEW_IMAGE_HASH" |  | ||||||
|     elif [[ -n "$NEW_IMAGE_HASH" ]]; then |  | ||||||
|         warning "⚠️ Same image hash detected: $NEW_IMAGE_HASH (this may be expected)" |  | ||||||
|     else |  | ||||||
|         error "❌ Could not determine new image hash" |  | ||||||
|         return 1 |  | ||||||
|     fi |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| # Get detailed container information for debugging |  | ||||||
| get_container_diagnostics() { |  | ||||||
|     local service_name="$1" |  | ||||||
|     local container_logs="" |  | ||||||
|      |  | ||||||
|     error "=== 🔍 DETAILED DIAGNOSTICS FOR ${service_name} ===" |  | ||||||
|      |  | ||||||
|     # Get all tasks for this service |  | ||||||
|     local tasks |  | ||||||
|     tasks=$(docker service ps "${CI_REPO_NAME}_${service_name}" --format "{{.ID}}\t{{.Name}}\t{{.CurrentState}}\t{{.Error}}" --no-trunc) |  | ||||||
|      |  | ||||||
|     if [[ -n "$tasks" ]]; then |  | ||||||
|         error "Service tasks:" |  | ||||||
|         echo "$tasks" | while IFS=$'\t' read -r task_id name state task_error; do |  | ||||||
|             error "  Task: $name" |  | ||||||
|             error "    ID: $task_id" |  | ||||||
|             error "    State: $state" |  | ||||||
|             if [[ -n "$task_error" ]]; then |  | ||||||
|                 error "    Error: $task_error" |  | ||||||
|             fi |  | ||||||
|              |  | ||||||
|             # Try to get container logs for this task |  | ||||||
|             log "Attempting to get logs for task $task_id..." |  | ||||||
|             local task_logs |  | ||||||
|             task_logs=$(docker service logs "${CI_REPO_NAME}_${service_name}" --raw --tail 20 2>/dev/null || echo "No logs available") |  | ||||||
|             if [[ "$task_logs" != "No logs available" ]]; then |  | ||||||
|                 error "    Recent logs:" |  | ||||||
|                 echo "$task_logs" | sed 's/^/      /' |  | ||||||
|             fi |  | ||||||
|         done |  | ||||||
|     else |  | ||||||
|         error "No service tasks found for ${service_name}" |  | ||||||
|     fi |  | ||||||
|      |  | ||||||
|     # Get service inspection details |  | ||||||
|     error "Service inspection:" |  | ||||||
|     docker service inspect "${CI_REPO_NAME}_${service_name}" --pretty 2>/dev/null | head -20 | sed 's/^/  /' || error "  Service inspect failed" |  | ||||||
|      |  | ||||||
|     # Check if there are any containers running for this service |  | ||||||
|     local containers |  | ||||||
|     containers=$(docker ps -a --filter "label=com.docker.swarm.service.name=${CI_REPO_NAME}_${service_name}" --format "{{.ID}}\t{{.Status}}\t{{.Names}}" 2>/dev/null || echo "") |  | ||||||
|      |  | ||||||
|     if [[ -n "$containers" ]]; then |  | ||||||
|         error "Associated containers:" |  | ||||||
|         echo "$containers" | while IFS=$'\t' read -r container_id status name; do |  | ||||||
|             error "  Container: $name ($container_id)" |  | ||||||
|             error "    Status: $status" |  | ||||||
|              |  | ||||||
|             # Get container logs |  | ||||||
|             local container_logs |  | ||||||
|             container_logs=$(docker logs "$container_id" --tail 15 2>&1 || echo "No container logs available") |  | ||||||
|             error "    Container logs (last 15 lines):" |  | ||||||
|             echo "$container_logs" | sed 's/^/      /' |  | ||||||
|         done |  | ||||||
|     else |  | ||||||
|         error "No containers found for service ${service_name}" |  | ||||||
|     fi |  | ||||||
|      |  | ||||||
|     error "=== END DIAGNOSTICS FOR ${service_name} ===" |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| # Optimized wait for stack removal |  | ||||||
| wait_for_stack_removal() { | wait_for_stack_removal() { | ||||||
|     log "Verifying stack removal completed" |     log "Verifying stack removal completed" | ||||||
|     local timeout=60  # Reduced timeout for faster deployment |     local timeout=$((DEPLOYMENT_TIMEOUT)) | ||||||
|     local elapsed=0 |     local elapsed=0 | ||||||
|      |      | ||||||
|     while docker stack ls | grep -q "${CI_REPO_NAME}"; do  |     while docker stack ls | grep -q "${CI_REPO_NAME}"; do  | ||||||
|  | @ -321,11 +221,9 @@ wait_for_stack_removal() { | ||||||
|             return 1 |             return 1 | ||||||
|         fi |         fi | ||||||
|          |          | ||||||
|         if [[ $((elapsed % 10)) -eq 0 ]]; then  # Log every 10 seconds instead of 5 |         log "Stack still exists, waiting... (${elapsed}s/${timeout}s)" | ||||||
|             log "Stack still exists, waiting... (${elapsed}s/${timeout}s)" |         sleep 5 | ||||||
|         fi |         elapsed=$((elapsed + 5)) | ||||||
|         sleep 2  # Check every 2 seconds instead of 5 |  | ||||||
|         elapsed=$((elapsed + 2)) |  | ||||||
|     done |     done | ||||||
|      |      | ||||||
|     success "Stack removal completed in ${elapsed} seconds" |     success "Stack removal completed in ${elapsed} seconds" | ||||||
|  | @ -404,80 +302,56 @@ deploy_stack() { | ||||||
|     success "Stack deployment command completed" |     success "Stack deployment command completed" | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # Enhanced health checking optimized for speed and accuracy | # Enhanced health checking with multiple validation methods | ||||||
| comprehensive_health_check() { | comprehensive_health_check() { | ||||||
|     log "🔍 Starting rapid health verification (${HEALTH_CHECK_TIMEOUT}s timeout)" |     log "Starting comprehensive health check (${HEALTH_CHECK_TIMEOUT}s timeout)" | ||||||
|     local start_time=$(date +%s) |     local start_time=$(date +%s) | ||||||
|     local timeout=$HEALTH_CHECK_TIMEOUT |     local timeout=$HEALTH_CHECK_TIMEOUT | ||||||
|      |      | ||||||
|     # Minimal initial wait - just 10 seconds instead of 30 |     # Wait for services to initialize | ||||||
|     log "Brief initialization wait (10 seconds)..." |     log "Waiting for services to initialize (30 seconds)" | ||||||
|     sleep 10 |     sleep 30 | ||||||
|      |      | ||||||
|     # Get immediate deployment status |     # Check deployment status | ||||||
|     log "Checking deployment status" |     log "Checking deployment status" | ||||||
|     docker stack ps "${CI_REPO_NAME}" |     docker stack ps "${CI_REPO_NAME}" | ||||||
|      |      | ||||||
|     # Fast health check loop with 2-second intervals |     # Health check loop with multiple validation methods | ||||||
|     local check_count=0 |     local check_count=0 | ||||||
|     local max_checks=$((timeout / 2))  # Check every 2 seconds |     local max_checks=$((timeout / 5)) | ||||||
|     local authelia_healthy=false |  | ||||||
|     local last_status="" |  | ||||||
|      |      | ||||||
|     while [[ $check_count -lt $max_checks ]]; do |     while [[ $check_count -lt $max_checks ]]; do | ||||||
|         local current_time=$(date +%s) |         local current_time=$(date +%s) | ||||||
|         local elapsed=$((current_time - start_time)) |         local elapsed=$((current_time - start_time)) | ||||||
|          |          | ||||||
|         # Only log every 10 seconds to reduce noise |         log "Health check attempt $((check_count + 1))/${max_checks} (${elapsed}s elapsed)" | ||||||
|         if [[ $((check_count % 5)) -eq 0 ]]; then |  | ||||||
|             log "Health check ${check_count}/${max_checks} (${elapsed}s elapsed)" |  | ||||||
|         fi |  | ||||||
|          |          | ||||||
|         # Get current service status |         # Check if authelia service is running | ||||||
|         local service_status |         if docker stack ps "${CI_REPO_NAME}" | grep -q "authelia_authelia.*Running"; then | ||||||
|         service_status=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Name}}\t{{.CurrentState}}\t{{.Error}}" | grep "authelia_authelia" | head -n1) |             success "✅ Authelia service is running!" | ||||||
|          |  | ||||||
|         if [[ -n "$service_status" ]]; then |  | ||||||
|             local name=$(echo "$service_status" | cut -f1) |  | ||||||
|             local state=$(echo "$service_status" | cut -f2) |  | ||||||
|             local error_msg=$(echo "$service_status" | cut -f3) |  | ||||||
|              |              | ||||||
|             # Check for Running state |             # Additional verification checks | ||||||
|             if echo "$state" | grep -q "Running"; then |             log "Performing additional health verification..." | ||||||
|                 # Verify it's actually stable by checking for a few seconds |             sleep 5 | ||||||
|                 if [[ "$last_status" == "Running" ]]; then |              | ||||||
|                     # Double-check: no recent failures |             # Check service is stable (not restarting) | ||||||
|                     local failed_count |             local service_info | ||||||
|                     failed_count=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -c "Failed" || echo "0") |             service_info=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | head -n1) | ||||||
|                      |              | ||||||
|                     if [[ $failed_count -eq 0 ]]; then |             if echo "$service_info" | grep -q "Running"; then | ||||||
|                         # Final verification: ensure we're using the new image |                 # Check if there are any failed instances | ||||||
|                         local current_image |                 local failed_count | ||||||
|                         current_image=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Image}}" | grep authelia | head -n1) |                 failed_count=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -c "Failed" || echo "0") | ||||||
|                          |                  | ||||||
|                         if [[ "$current_image" == *"$NEW_IMAGE_HASH"* ]] || [[ -z "$NEW_IMAGE_HASH" ]]; then |                 if [[ $failed_count -eq 0 ]]; then | ||||||
|                             success "✅ Authelia service is healthy and running!" |                     success "🎉 Production deployment completed successfully!" | ||||||
|                             success "🎯 Using correct image: $current_image" |                     success "Authelia service is healthy and stable" | ||||||
|                             success "⚡ Total deployment time: ${elapsed} seconds" |                     success "Total deployment time: ${elapsed} seconds" | ||||||
|                             ROLLBACK_NEEDED=false |                     ROLLBACK_NEEDED=false | ||||||
|                             return 0 |                     return 0 | ||||||
|                         else |                 else | ||||||
|                             warning "⚠️ Service running but using wrong image: $current_image (expected: $NEW_IMAGE_HASH)" |                     warning "Found $failed_count failed service instances, continuing health checks..." | ||||||
|                         fi |  | ||||||
|                     else |  | ||||||
|                         warning "⚠️ Service running but found $failed_count failed instances" |  | ||||||
|                     fi |  | ||||||
|                 fi |                 fi | ||||||
|                 last_status="Running" |  | ||||||
|             elif echo "$state" | grep -q "Failed\|Rejected\|Shutdown"; then |  | ||||||
|                 error "❌ Service failed: $state" |  | ||||||
|                 if [[ -n "$error_msg" ]]; then |  | ||||||
|                     error "Error: $error_msg" |  | ||||||
|                 fi |  | ||||||
|                 break  # Exit early on clear failure |  | ||||||
|             else |  | ||||||
|                 last_status="$state" |  | ||||||
|                 debug "Service state: $state" |  | ||||||
|             fi |             fi | ||||||
|         fi |         fi | ||||||
|          |          | ||||||
|  | @ -485,36 +359,25 @@ comprehensive_health_check() { | ||||||
|             break |             break | ||||||
|         fi |         fi | ||||||
|          |          | ||||||
|         sleep 2 |         log "Waiting for authelia service... (${elapsed}s/${timeout}s)" | ||||||
|  |         sleep 5 | ||||||
|         ((check_count++)) |         ((check_count++)) | ||||||
|     done |     done | ||||||
|      |      | ||||||
|     # Health check failed - provide comprehensive diagnostics |     # Health check failed | ||||||
|     error "❌ Health check failed after ${elapsed} seconds" |     error "❌ Health check failed after ${timeout} seconds" | ||||||
|     error "Deployment verification failed" |     error "Deployment verification failed" | ||||||
|      |      | ||||||
|     # Get detailed diagnostics for each service |     # Show detailed debugging information | ||||||
|     log "🔍 Gathering comprehensive diagnostics..." |     error "=== DEBUGGING INFORMATION ===" | ||||||
|  |     error "Stack status:" | ||||||
|  |     docker stack ps "${CI_REPO_NAME}" || true | ||||||
|      |      | ||||||
|     local services=("authelia" "mariadb" "redis") |     error "Authelia service logs (last 30 lines):" | ||||||
|     for service in "${services[@]}"; do |     docker service logs "${CI_REPO_NAME}_authelia" --tail 30 || true | ||||||
|         if docker service ls --format "{{.Name}}" | grep -q "${CI_REPO_NAME}_${service}"; then |  | ||||||
|             get_container_diagnostics "$service" |  | ||||||
|         else |  | ||||||
|             error "Service ${CI_REPO_NAME}_${service} not found!" |  | ||||||
|         fi |  | ||||||
|     done |  | ||||||
|      |      | ||||||
|     # Additional stack-level diagnostics |     error "Docker service inspect:" | ||||||
|     error "=== 📊 STACK-LEVEL DIAGNOSTICS ===" |     docker service inspect "${CI_REPO_NAME}_authelia" --pretty || true | ||||||
|     error "Full stack status:" |  | ||||||
|     docker stack ps "${CI_REPO_NAME}" --no-trunc || true |  | ||||||
|      |  | ||||||
|     error "Stack services:" |  | ||||||
|     docker stack services "${CI_REPO_NAME}" || true |  | ||||||
|      |  | ||||||
|     error "Recent Docker events:" |  | ||||||
|     docker events --since="$((elapsed + 60))s" --until="now" --filter "container" 2>/dev/null | tail -10 || true |  | ||||||
|      |      | ||||||
|     return 1 |     return 1 | ||||||
| } | } | ||||||
|  | @ -532,16 +395,13 @@ main() { | ||||||
|     # Step 1: Docker registry login |     # Step 1: Docker registry login | ||||||
|     docker_registry_login |     docker_registry_login | ||||||
|      |      | ||||||
|     # Step 1.5: Force pull latest images to ensure fresh deployment |  | ||||||
|     force_pull_latest_images |  | ||||||
|      |  | ||||||
|     # Step 2: Remove old stack to release secrets |     # Step 2: Remove old stack to release secrets | ||||||
|     log "Removing old stack to release secrets" |     log "Removing old stack to release secrets" | ||||||
|     docker stack rm "${CI_REPO_NAME}" || true |     docker stack rm "${CI_REPO_NAME}" || true | ||||||
|      |      | ||||||
|     # Step 3: Wait for complete stack removal with optimized timeout |     # Step 3: Wait for complete stack removal with timeout | ||||||
|     log "Waiting for complete stack removal (minimum 15 seconds)" |     log "Waiting for complete stack removal (30 seconds minimum)" | ||||||
|     sleep 15  # Reduced from 30 seconds |     sleep 30 | ||||||
|     wait_for_stack_removal |     wait_for_stack_removal | ||||||
|      |      | ||||||
|     # Step 4 & 5: Manage secrets (remove old, create new) |     # Step 4 & 5: Manage secrets (remove old, create new) | ||||||
|  | @ -550,11 +410,10 @@ main() { | ||||||
|     # Step 6: Deploy new stack |     # Step 6: Deploy new stack | ||||||
|     deploy_stack |     deploy_stack | ||||||
|      |      | ||||||
|     # Step 7-9: Rapid health checking with container diagnostics |     # Step 7-9: Comprehensive health checking | ||||||
|     comprehensive_health_check |     comprehensive_health_check | ||||||
|      |      | ||||||
|     success "🎉 Production deployment completed successfully!" |     success "🎉 Production deployment completed successfully!" | ||||||
|     success "🏆 Deployed image: $NEW_IMAGE_HASH" |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # Run main function | # Run main function | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue