fixing deploy step
ci/woodpecker/push/woodpecker Pipeline failed Details

This commit is contained in:
Your Name 2025-06-05 11:58:43 -04:00
parent 40fe535bdd
commit ff3d077282
1 changed files with 54 additions and 64 deletions

View File

@ -80,9 +80,18 @@ cleanup() {
rm -f "$LOCK_FILE" rm -f "$LOCK_FILE"
fi fi
if [ $exit_code -ne 0 ] && [ "$ROLLBACK_NEEDED" = "true" ]; then if [ $exit_code -ne 0 ]; then
error "Deployment failed - attempting rollback..." error "Deployment failed with exit code: $exit_code"
attempt_rollback log "📊 Providing final deployment status for debugging..."
# Show final stack status for debugging
if docker stack ls | grep -q "${CI_REPO_NAME}"; then
error "=== FINAL STACK STATUS ==="
docker stack ps "${CI_REPO_NAME}" --no-trunc || true
docker stack services "${CI_REPO_NAME}" || true
else
warning "Stack ${CI_REPO_NAME} no longer exists"
fi
fi fi
debug "Cleanup completed with exit code: $exit_code" debug "Cleanup completed with exit code: $exit_code"
@ -371,7 +380,6 @@ manage_secrets() {
# Enhanced deployment with better error handling # Enhanced deployment with better error handling
deploy_stack() { deploy_stack() {
log "Deploying new stack with fresh secrets" log "Deploying new stack with fresh secrets"
ROLLBACK_NEEDED=true
DEPLOYMENT_STARTED=true DEPLOYMENT_STARTED=true
local deploy_cmd="docker stack deploy --with-registry-auth -c ./stack.production.yml '${CI_REPO_NAME}'" local deploy_cmd="docker stack deploy --with-registry-auth -c ./stack.production.yml '${CI_REPO_NAME}'"
@ -384,9 +392,9 @@ deploy_stack() {
success "Stack deployment command completed" success "Stack deployment command completed"
} }
# Enhanced health checking optimized for speed and accuracy # Enhanced health checking focused on image verification and debugging
comprehensive_health_check() { comprehensive_health_check() {
log "🔍 Starting rapid health verification (${HEALTH_CHECK_TIMEOUT}s timeout)" log "🔍 Starting deployment verification (${HEALTH_CHECK_TIMEOUT}s timeout)"
local start_time=$(date +%s) local start_time=$(date +%s)
local timeout=$HEALTH_CHECK_TIMEOUT local timeout=$HEALTH_CHECK_TIMEOUT
@ -398,20 +406,15 @@ comprehensive_health_check() {
log "Checking deployment status" log "Checking deployment status"
docker stack ps "${CI_REPO_NAME}" docker stack ps "${CI_REPO_NAME}"
# Fast health check loop with 2-second intervals # Image verification loop
local check_count=0 local check_count=0
local max_checks=$((timeout / 2)) # Check every 2 seconds local max_checks=$((timeout / 10)) # Check every 10 seconds
local authelia_healthy=false
local last_status=""
while [ $check_count -lt $max_checks ]; do while [ $check_count -lt $max_checks ]; do
local current_time=$(date +%s) local current_time=$(date +%s)
local elapsed=$((current_time - start_time)) local elapsed=$((current_time - start_time))
# Only log every 10 seconds to reduce noise log "Verification check ${check_count}/${max_checks} (${elapsed}s elapsed)"
if [ $((check_count % 5)) -eq 0 ]; then
log "Health check ${check_count}/${max_checks} (${elapsed}s elapsed)"
fi
# Get current service status # Get current service status
local service_status local service_status
@ -422,81 +425,68 @@ comprehensive_health_check() {
local state=$(echo "$service_status" | cut -f2) local state=$(echo "$service_status" | cut -f2)
local error_msg=$(echo "$service_status" | cut -f3) local error_msg=$(echo "$service_status" | cut -f3)
log "Current Authelia state: $state"
# Check for Running state # Check for Running state
if echo "$state" | grep -q "Running"; then if echo "$state" | grep -q "Running"; then
# Verify it's actually stable by checking for a few seconds # Verify image hash
if [ "$last_status" = "Running" ]; then
# Double-check: no recent failures
local failed_count
failed_count=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -c "Failed" || echo "0")
if [ $failed_count -eq 0 ]; then
# Final verification: ensure we're using the new image
local current_image local current_image
current_image=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Image}}" | grep authelia | head -n1) current_image=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Image}}" | grep authelia | head -n1)
log "🎯 Current image: $current_image"
log "🎯 Expected image hash: $NEW_IMAGE_HASH"
if echo "$current_image" | grep -q "$NEW_IMAGE_HASH" || [ -z "$NEW_IMAGE_HASH" ]; then if echo "$current_image" | grep -q "$NEW_IMAGE_HASH" || [ -z "$NEW_IMAGE_HASH" ]; then
success "✅ Authelia service is healthy and running!" success "✅ Authelia service is healthy and running with correct image!"
success "🎯 Using correct image: $current_image" success "🎯 Using image: $current_image"
success "⚡ Total deployment time: ${elapsed} seconds" success "⚡ Total deployment time: ${elapsed} seconds"
ROLLBACK_NEEDED=false
return 0 return 0
else else
warning "⚠️ Service running but using wrong image: $current_image (expected: $NEW_IMAGE_HASH)" warning "⚠️ Service running but using different image than expected"
warning "Current: $current_image"
warning "Expected hash: $NEW_IMAGE_HASH"
warning "This may be normal if the image hasn't changed"
fi fi
else
warning "⚠️ Service running but found $failed_count failed instances"
fi
fi
last_status="Running"
elif echo "$state" | grep -q "Failed\|Rejected\|Shutdown"; then elif echo "$state" | grep -q "Failed\|Rejected\|Shutdown"; then
error "❌ Service failed: $state" warning "❌ Service failed: $state"
if [ -n "$error_msg" ]; then if [ -n "$error_msg" ]; then
error "Error: $error_msg" error "Error: $error_msg"
fi fi
break # Exit early on clear failure # Get recent logs for debugging
log "📋 Getting recent logs for debugging..."
docker service logs "${CI_REPO_NAME}_authelia" --tail 20 2>/dev/null || echo "No logs available"
else else
last_status="$state" debug "Service state: $state (still starting up)"
debug "Service state: $state"
fi fi
fi fi
if [ $elapsed -ge $timeout ]; then if [ $elapsed -ge $timeout ]; then
warning "⏰ Reached timeout after ${elapsed} seconds"
log "📊 Final status for debugging:"
docker stack ps "${CI_REPO_NAME}" --no-trunc || true
break break
fi fi
sleep 2 sleep 10
check_count=$((check_count + 1)) check_count=$((check_count + 1))
done done
# Health check failed - provide comprehensive diagnostics # Deployment verification completed
error "❌ Health check failed after ${elapsed} seconds" warning "📊 Deployment verification completed - check logs above for status"
error "Deployment verification failed"
# Get detailed diagnostics for each service
log "🔍 Gathering comprehensive diagnostics..."
# Get final diagnostic info
log "🔍 Final diagnostics..."
local services="authelia mariadb redis" local services="authelia mariadb redis"
for service in $services; do for service in $services; do
if docker service ls --format "{{.Name}}" | grep -q "${CI_REPO_NAME}_${service}"; then if docker service ls --format "{{.Name}}" | grep -q "${CI_REPO_NAME}_${service}"; then
get_container_diagnostics "$service" log "=== ${service} STATUS ==="
else docker service logs "${CI_REPO_NAME}_${service}" --tail 10 2>/dev/null || echo "No logs available"
error "Service ${CI_REPO_NAME}_${service} not found!"
fi fi
done done
# Additional stack-level diagnostics # Don't fail - let it run for debugging
error "=== 📊 STACK-LEVEL DIAGNOSTICS ===" warning "Deployment may still be starting - leaving stack running for debugging"
error "Full stack status:" return 0
docker stack ps "${CI_REPO_NAME}" --no-trunc || true
error "Stack services:"
docker stack services "${CI_REPO_NAME}" || true
error "Recent Docker events:"
docker events --since="$((elapsed + 60))s" --until="now" --filter "container" 2>/dev/null | tail -10 || true
return 1
} }
# Main deployment function # Main deployment function