#!/bin/bash # ===================================================================== # sitemap-test.sh - Test sitemap.xml # ===================================================================== # This script tests that sitemap.xml is properly generated and contains # all expected URLs # ===================================================================== set -e echo "=== Testing sitemap.xml ===" # Get the base URL from the command line or use the default if [ -z "$1" ]; then BASE_URL="http://localhost:8080" else BASE_URL="$1" fi # Check if sitemap.xml exists if ! curl -s "$BASE_URL/sitemap.xml" > /dev/null; then echo "❌ sitemap.xml not found at $BASE_URL/sitemap.xml" exit 1 fi # Download sitemap.xml echo "Downloading sitemap.xml from $BASE_URL/sitemap.xml" SITEMAP=$(curl -s "$BASE_URL/sitemap.xml") # Count URLs in sitemap URL_COUNT=$(echo "$SITEMAP" | grep -c "") echo "Found $URL_COUNT URLs in sitemap.xml" # Check if sitemap contains at least 10 URLs if [ "$URL_COUNT" -lt 10 ]; then echo "❌ sitemap.xml contains fewer than 10 URLs ($URL_COUNT)" exit 1 fi # Check if important pages are included IMPORTANT_PAGES=( "$BASE_URL/index.html" "$BASE_URL/stories/index.html" "$BASE_URL/stories/open-source-success.html" "$BASE_URL/stories/viperwire.html" "$BASE_URL/one-pager-tools/csv-tool.html" ) MISSING_PAGES=0 for page in "${IMPORTANT_PAGES[@]}"; do # Extract domain-relative path rel_path="${page#$BASE_URL/}" # Check if page is in sitemap if ! echo "$SITEMAP" | grep -q "$rel_path"; then echo "❌ Important page missing from sitemap: $rel_path" MISSING_PAGES=$((MISSING_PAGES + 1)) fi done if [ "$MISSING_PAGES" -gt 0 ]; then echo "❌ $MISSING_PAGES important pages missing from sitemap.xml" exit 1 fi echo "✅ sitemap.xml test passed" exit 0