fixing deploy step
ci/woodpecker/push/woodpecker Pipeline failed
Details
ci/woodpecker/push/woodpecker Pipeline failed
Details
This commit is contained in:
parent
40fe535bdd
commit
ff3d077282
|
@ -80,9 +80,18 @@ cleanup() {
|
|||
rm -f "$LOCK_FILE"
|
||||
fi
|
||||
|
||||
if [ $exit_code -ne 0 ] && [ "$ROLLBACK_NEEDED" = "true" ]; then
|
||||
error "Deployment failed - attempting rollback..."
|
||||
attempt_rollback
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
error "Deployment failed with exit code: $exit_code"
|
||||
log "📊 Providing final deployment status for debugging..."
|
||||
|
||||
# Show final stack status for debugging
|
||||
if docker stack ls | grep -q "${CI_REPO_NAME}"; then
|
||||
error "=== FINAL STACK STATUS ==="
|
||||
docker stack ps "${CI_REPO_NAME}" --no-trunc || true
|
||||
docker stack services "${CI_REPO_NAME}" || true
|
||||
else
|
||||
warning "Stack ${CI_REPO_NAME} no longer exists"
|
||||
fi
|
||||
fi
|
||||
|
||||
debug "Cleanup completed with exit code: $exit_code"
|
||||
|
@ -371,7 +380,6 @@ manage_secrets() {
|
|||
# Enhanced deployment with better error handling
|
||||
deploy_stack() {
|
||||
log "Deploying new stack with fresh secrets"
|
||||
ROLLBACK_NEEDED=true
|
||||
DEPLOYMENT_STARTED=true
|
||||
|
||||
local deploy_cmd="docker stack deploy --with-registry-auth -c ./stack.production.yml '${CI_REPO_NAME}'"
|
||||
|
@ -384,9 +392,9 @@ deploy_stack() {
|
|||
success "Stack deployment command completed"
|
||||
}
|
||||
|
||||
# Enhanced health checking optimized for speed and accuracy
|
||||
# Enhanced health checking focused on image verification and debugging
|
||||
comprehensive_health_check() {
|
||||
log "🔍 Starting rapid health verification (${HEALTH_CHECK_TIMEOUT}s timeout)"
|
||||
log "🔍 Starting deployment verification (${HEALTH_CHECK_TIMEOUT}s timeout)"
|
||||
local start_time=$(date +%s)
|
||||
local timeout=$HEALTH_CHECK_TIMEOUT
|
||||
|
||||
|
@ -398,20 +406,15 @@ comprehensive_health_check() {
|
|||
log "Checking deployment status"
|
||||
docker stack ps "${CI_REPO_NAME}"
|
||||
|
||||
# Fast health check loop with 2-second intervals
|
||||
# Image verification loop
|
||||
local check_count=0
|
||||
local max_checks=$((timeout / 2)) # Check every 2 seconds
|
||||
local authelia_healthy=false
|
||||
local last_status=""
|
||||
local max_checks=$((timeout / 10)) # Check every 10 seconds
|
||||
|
||||
while [ $check_count -lt $max_checks ]; do
|
||||
local current_time=$(date +%s)
|
||||
local elapsed=$((current_time - start_time))
|
||||
|
||||
# Only log every 10 seconds to reduce noise
|
||||
if [ $((check_count % 5)) -eq 0 ]; then
|
||||
log "Health check ${check_count}/${max_checks} (${elapsed}s elapsed)"
|
||||
fi
|
||||
log "Verification check ${check_count}/${max_checks} (${elapsed}s elapsed)"
|
||||
|
||||
# Get current service status
|
||||
local service_status
|
||||
|
@ -422,81 +425,68 @@ comprehensive_health_check() {
|
|||
local state=$(echo "$service_status" | cut -f2)
|
||||
local error_msg=$(echo "$service_status" | cut -f3)
|
||||
|
||||
log "Current Authelia state: $state"
|
||||
|
||||
# Check for Running state
|
||||
if echo "$state" | grep -q "Running"; then
|
||||
# Verify it's actually stable by checking for a few seconds
|
||||
if [ "$last_status" = "Running" ]; then
|
||||
# Double-check: no recent failures
|
||||
local failed_count
|
||||
failed_count=$(docker stack ps "${CI_REPO_NAME}" | grep "authelia_authelia" | grep -c "Failed" || echo "0")
|
||||
|
||||
if [ $failed_count -eq 0 ]; then
|
||||
# Final verification: ensure we're using the new image
|
||||
local current_image
|
||||
current_image=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Image}}" | grep authelia | head -n1)
|
||||
|
||||
if echo "$current_image" | grep -q "$NEW_IMAGE_HASH" || [ -z "$NEW_IMAGE_HASH" ]; then
|
||||
success "✅ Authelia service is healthy and running!"
|
||||
success "🎯 Using correct image: $current_image"
|
||||
success "⚡ Total deployment time: ${elapsed} seconds"
|
||||
ROLLBACK_NEEDED=false
|
||||
return 0
|
||||
else
|
||||
warning "⚠️ Service running but using wrong image: $current_image (expected: $NEW_IMAGE_HASH)"
|
||||
fi
|
||||
else
|
||||
warning "⚠️ Service running but found $failed_count failed instances"
|
||||
fi
|
||||
# Verify image hash
|
||||
local current_image
|
||||
current_image=$(docker stack ps "${CI_REPO_NAME}" --format "{{.Image}}" | grep authelia | head -n1)
|
||||
|
||||
log "🎯 Current image: $current_image"
|
||||
log "🎯 Expected image hash: $NEW_IMAGE_HASH"
|
||||
|
||||
if echo "$current_image" | grep -q "$NEW_IMAGE_HASH" || [ -z "$NEW_IMAGE_HASH" ]; then
|
||||
success "✅ Authelia service is healthy and running with correct image!"
|
||||
success "🎯 Using image: $current_image"
|
||||
success "⚡ Total deployment time: ${elapsed} seconds"
|
||||
return 0
|
||||
else
|
||||
warning "⚠️ Service running but using different image than expected"
|
||||
warning "Current: $current_image"
|
||||
warning "Expected hash: $NEW_IMAGE_HASH"
|
||||
warning "This may be normal if the image hasn't changed"
|
||||
fi
|
||||
last_status="Running"
|
||||
elif echo "$state" | grep -q "Failed\|Rejected\|Shutdown"; then
|
||||
error "❌ Service failed: $state"
|
||||
warning "❌ Service failed: $state"
|
||||
if [ -n "$error_msg" ]; then
|
||||
error "Error: $error_msg"
|
||||
fi
|
||||
break # Exit early on clear failure
|
||||
# Get recent logs for debugging
|
||||
log "📋 Getting recent logs for debugging..."
|
||||
docker service logs "${CI_REPO_NAME}_authelia" --tail 20 2>/dev/null || echo "No logs available"
|
||||
else
|
||||
last_status="$state"
|
||||
debug "Service state: $state"
|
||||
debug "Service state: $state (still starting up)"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $elapsed -ge $timeout ]; then
|
||||
warning "⏰ Reached timeout after ${elapsed} seconds"
|
||||
log "📊 Final status for debugging:"
|
||||
docker stack ps "${CI_REPO_NAME}" --no-trunc || true
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
sleep 10
|
||||
check_count=$((check_count + 1))
|
||||
done
|
||||
|
||||
# Health check failed - provide comprehensive diagnostics
|
||||
error "❌ Health check failed after ${elapsed} seconds"
|
||||
error "Deployment verification failed"
|
||||
|
||||
# Get detailed diagnostics for each service
|
||||
log "🔍 Gathering comprehensive diagnostics..."
|
||||
# Deployment verification completed
|
||||
warning "📊 Deployment verification completed - check logs above for status"
|
||||
|
||||
# Get final diagnostic info
|
||||
log "🔍 Final diagnostics..."
|
||||
local services="authelia mariadb redis"
|
||||
for service in $services; do
|
||||
if docker service ls --format "{{.Name}}" | grep -q "${CI_REPO_NAME}_${service}"; then
|
||||
get_container_diagnostics "$service"
|
||||
else
|
||||
error "Service ${CI_REPO_NAME}_${service} not found!"
|
||||
log "=== ${service} STATUS ==="
|
||||
docker service logs "${CI_REPO_NAME}_${service}" --tail 10 2>/dev/null || echo "No logs available"
|
||||
fi
|
||||
done
|
||||
|
||||
# Additional stack-level diagnostics
|
||||
error "=== 📊 STACK-LEVEL DIAGNOSTICS ==="
|
||||
error "Full stack status:"
|
||||
docker stack ps "${CI_REPO_NAME}" --no-trunc || true
|
||||
|
||||
error "Stack services:"
|
||||
docker stack services "${CI_REPO_NAME}" || true
|
||||
|
||||
error "Recent Docker events:"
|
||||
docker events --since="$((elapsed + 60))s" --until="now" --filter "container" 2>/dev/null | tail -10 || true
|
||||
|
||||
return 1
|
||||
# Don't fail - let it run for debugging
|
||||
warning "Deployment may still be starting - leaving stack running for debugging"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Main deployment function
|
||||
|
|
Loading…
Reference in New Issue