tarballer/test/test.sh

#!/bin/sh

# Determine which test to run
TEST_TYPE=${1:-"all"}

# Set to 1 to keep temporary files, 0 to clean them up
KEEP_TEMP_FILES=${2:-0}

# Function to generate a random file with predictable size
generate_random_file() {
  local OUTPUT=$1
  local SIZE=$2
  dd if=/dev/urandom bs=1 count=$SIZE of="$OUTPUT" 2>/dev/null
}

# Generate random string (for file contents)
generate_random_string() {
  local LENGTH=$1
  tr -dc A-Za-z0-9 </dev/urandom | head -c $LENGTH
}

cleanup_files() {
  if [ "$KEEP_TEMP_FILES" -eq 0 ]; then
    echo "=== CLEANING UP TEMPORARY FILES ==="
    rm -rf /tmp/complex /tmp/complex-extracted
    rm -f /tmp/complex-original-md5.txt /tmp/complex-extracted-md5.txt
    rm -rf /tmp/standard-test /tmp/standard-extracted /tmp/reference-extracted
    rm -f /tmp/original-checksums.txt /tmp/standard-checksums.txt /tmp/reference-checksums.txt
    rm -f /workdir/complex.tar.gz /workdir/standard.tar.gz /workdir/reference.tar.gz
    rm -rf /tmp/exclude-test /tmp/exclude-extracted
    rm -f /workdir/exclude.tar.gz
    echo "Temporary files cleaned up"
  else
    echo "Keeping temporary files for inspection"
  fi
}

run_basic_test() {
  echo "=== RUNNING BASIC TEST ==="

  # Clean up existing test directories
  rm -rf /tmp/complex /tmp/complex-extracted
  rm -f /tmp/complex-original-md5.txt /tmp/complex-extracted-md5.txt

  # Create complex directory structure
  mkdir -p /tmp/complex/dir1/subdir1/subsubdir1
  mkdir -p /tmp/complex/dir1/subdir2
  mkdir -p /tmp/complex/dir2/subdir1

  # Create files at different levels with random content
  echo "root level file ($(generate_random_string 8))" > /tmp/complex/rootfile.txt
  echo "level 1 file in dir1 ($(generate_random_string 12))" > /tmp/complex/dir1/file1.txt
  echo "level 1 file in dir2 ($(generate_random_string 16))" > /tmp/complex/dir2/file2.txt
  echo "level 2 file in subdir1 ($(generate_random_string 10))" > /tmp/complex/dir1/subdir1/file3.txt
  echo "level 2 file in subdir2 ($(generate_random_string 14))" > /tmp/complex/dir1/subdir2/file4.txt
  echo "level 3 file in subsubdir1 ($(generate_random_string 20))" > /tmp/complex/dir1/subdir1/subsubdir1/file5.txt

  # Add random binary files of different sizes
  generate_random_file "/tmp/complex/random_binary_small.bin" 512
  generate_random_file "/tmp/complex/dir1/random_binary_medium.bin" 2048
  generate_random_file "/tmp/complex/dir2/random_binary_large.bin" 8192

  # Create a symbolic link with a relative path instead of absolute
  cd /tmp/complex/dir2 && ln -s ../rootfile.txt symlink.txt && cd /workdir

  # Calculate MD5 hashes of original files for verification
  find /tmp/complex -type f | sort | xargs md5sum > /tmp/complex-original-md5.txt

  # Print the original structure for reference
  echo '=== ORIGINAL DIRECTORY STRUCTURE ==='
  find /tmp/complex -type f -o -type l | sort

  # Create the tarball
  /bin/tarballer -source /tmp/complex -output /workdir/complex.tar.gz -prefix complex-app

  # Extract the tarball
  mkdir -p /tmp/complex-extracted
  tar -xzf /workdir/complex.tar.gz -C /tmp/complex-extracted

  # Verify the extracted structure
  echo '=== EXTRACTED DIRECTORY STRUCTURE ==='
  find /tmp/complex-extracted -type f -o -type l | sort

  # Calculate MD5 hashes of extracted files
  find /tmp/complex-extracted -type f | sort | xargs md5sum > /tmp/complex-extracted-md5.txt

  # Compare file content
  echo '=== VERIFYING FILE CONTENTS ==='
  cat /tmp/complex/rootfile.txt
  echo ' <-- Original: rootfile.txt'
  cat /tmp/complex-extracted/complex-app/rootfile.txt
  echo ' <-- Extracted: rootfile.txt'

  cat /tmp/complex/dir1/subdir1/subsubdir1/file5.txt
  echo ' <-- Original: deep nested file5.txt'
  cat /tmp/complex-extracted/complex-app/dir1/subdir1/subsubdir1/file5.txt
  echo ' <-- Extracted: deep nested file5.txt'

  # Verify binary file MD5 hashes specifically
  echo '=== VERIFYING BINARY FILE MD5 HASHES ==='
  md5sum /tmp/complex/random_binary_small.bin
  md5sum /tmp/complex-extracted/complex-app/random_binary_small.bin

  md5sum /tmp/complex/dir1/random_binary_medium.bin
  md5sum /tmp/complex-extracted/complex-app/dir1/random_binary_medium.bin

  md5sum /tmp/complex/dir2/random_binary_large.bin
  md5sum /tmp/complex-extracted/complex-app/dir2/random_binary_large.bin

  # Test symlink
  echo '=== TESTING SYMLINK ==='
  ls -la /tmp/complex/dir2/symlink.txt
  ls -la /tmp/complex-extracted/complex-app/dir2/symlink.txt

  # Verify MD5 hashes
  echo '=== MD5 HASH VERIFICATION ==='
  echo 'Original file hashes:'
  cat /tmp/complex-original-md5.txt
  echo 'Extracted file hashes:'
  cat /tmp/complex-extracted-md5.txt

  # Verify hash comparison
  echo '=== COMPARING FILE HASHES ==='

  # Extract just file paths from original hashes
  ORIG_FILES=$(cat /tmp/complex-original-md5.txt | awk '{print $2}' | sort)

  # For each original file, check if its corresponding extracted file has the same hash
  ALL_MATCH=1
  for SOURCE_FILE in $ORIG_FILES; do
    # Get the base file name
    FILENAME=$(basename "$SOURCE_FILE")

    # Find the corresponding hash from original file
    ORIG_HASH=$(grep "$SOURCE_FILE" /tmp/complex-original-md5.txt | awk '{print $1}')

    # Find the corresponding file in the extracted directory and get its hash
    EXTRACTED_FILE=$(find /tmp/complex-extracted -name "$FILENAME" | head -1)

    if [ -z "$EXTRACTED_FILE" ]; then
      echo "ERROR: File $FILENAME not found in extracted directory"
      ALL_MATCH=0
      continue
    fi

    EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/complex-extracted-md5.txt | awk '{print $1}')

    if [ "$ORIG_HASH" != "$EXTRACTED_HASH" ]; then
      echo "ERROR: Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH"
      ALL_MATCH=0
    fi
  done

  if [ "$ALL_MATCH" -eq 1 ]; then
    echo 'SUCCESS: All file hashes match between original and extracted files!'
  else
    echo 'ERROR: Hash mismatch detected!'
    return 1
  fi

  echo 'Basic test completed successfully!'
  return 0
}

run_exclude_patterns_test() {
  echo "=== RUNNING EXCLUDE PATTERNS TEST ==="

  # Clean up test directories
  rm -rf /tmp/exclude-test /tmp/exclude-extracted
  rm -f /workdir/exclude.tar.gz

  # Create test directory structure
  mkdir -p /tmp/exclude-test/logs /tmp/exclude-test/temp
  mkdir -p /tmp/exclude-test/src/lib /tmp/exclude-test/bin
  mkdir -p /tmp/exclude-test/data

  # Create various file types
  echo "Main text file" > /tmp/exclude-test/main.txt
  echo "Config file" > /tmp/exclude-test/config.ini

  # Log files (to be excluded with pattern *.log)
  echo "Log file 1" > /tmp/exclude-test/logs/app.log
  echo "Log file 2" > /tmp/exclude-test/logs/error.log

  # Temporary files (to be excluded with pattern temp/)
  echo "Temp file 1" > /tmp/exclude-test/temp/cache.tmp
  echo "Temp file 2" > /tmp/exclude-test/temp/session.tmp

  # Source files (some to be excluded with pattern *.go)
  echo "Source file Go" > /tmp/exclude-test/src/main.go
  echo "Source file C" > /tmp/exclude-test/src/helper.c
  echo "Source file Go lib" > /tmp/exclude-test/src/lib/utils.go
  echo "Source file C lib" > /tmp/exclude-test/src/lib/core.c

  # Binary files (to be excluded with pattern bin/)
  generate_random_file "/tmp/exclude-test/bin/app" 1024
  generate_random_file "/tmp/exclude-test/bin/tool" 512

  # Data files (not to be excluded)
  generate_random_file "/tmp/exclude-test/data/data1.bin" 256
  generate_random_file "/tmp/exclude-test/data/data2.bin" 128

  # List original structure
  echo "=== ORIGINAL STRUCTURE ==="
  find /tmp/exclude-test -type f | sort
  echo "Total files: $(find /tmp/exclude-test -type f | wc -l)"

  # Test excluding *.log files
  echo "=== TEST 1: EXCLUDING *.log FILES ==="
  /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude1.tar.gz -prefix test -exclude "*.log" -verbose

  mkdir -p /tmp/exclude-extracted/test1
  tar -xzf /workdir/exclude1.tar.gz -C /tmp/exclude-extracted/test1

  echo "=== EXTRACTED STRUCTURE (WITHOUT LOGS) ==="
  find /tmp/exclude-extracted/test1 -type f | sort
  echo "Total files: $(find /tmp/exclude-extracted/test1 -type f | wc -l)"

  # Check that no .log files exist in the extracted archive
  LOG_FILES=$(find /tmp/exclude-extracted/test1 -name "*.log" | wc -l)
  if [ "$LOG_FILES" -eq 0 ]; then
    echo "SUCCESS: No .log files found in the extracted archive"
  else
    echo "ERROR: Found .log files in the extracted archive"
    return 1
  fi

  # Test excluding directories
  echo "=== TEST 2: EXCLUDING DIRECTORIES (temp/ and bin/) ==="
  /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude2.tar.gz -prefix test -exclude "temp/,bin/" -verbose

  mkdir -p /tmp/exclude-extracted/test2
  tar -xzf /workdir/exclude2.tar.gz -C /tmp/exclude-extracted/test2

  echo "=== EXTRACTED STRUCTURE (WITHOUT temp/ AND bin/) ==="
  find /tmp/exclude-extracted/test2 -type f | sort
  echo "Total files: $(find /tmp/exclude-extracted/test2 -type f | wc -l)"

  # Check that the excluded directories don't exist in the extracted archive
  EXCLUDED_DIRS=$(find /tmp/exclude-extracted/test2 -path "*/temp/*" -o -path "*/bin/*" | wc -l)
  if [ "$EXCLUDED_DIRS" -eq 0 ]; then
    echo "SUCCESS: No contents of temp/ or bin/ directories found in the extracted archive"
  else
    echo "ERROR: Found contents of excluded directories in the extracted archive"
    return 1
  fi

  # Test excluding multiple patterns
  echo "=== TEST 3: EXCLUDING MULTIPLE PATTERNS (*.log, *.go, bin/) ==="
  /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose

  mkdir -p /tmp/exclude-extracted/test3
  tar -xzf /workdir/exclude3.tar.gz -C /tmp/exclude-extracted/test3

  echo "=== EXTRACTED STRUCTURE (WITH MULTIPLE EXCLUSIONS) ==="
  find /tmp/exclude-extracted/test3 -type f | sort
  echo "Total files: $(find /tmp/exclude-extracted/test3 -type f | wc -l)"

  # Check all exclusions
  EXCLUDED_FILES=$(find /tmp/exclude-extracted/test3 -name "*.log" -o -name "*.go" -o -path "*/bin/*" | wc -l)
  if [ "$EXCLUDED_FILES" -eq 0 ]; then
    echo "SUCCESS: All excluded patterns are working correctly"
  else
    echo "ERROR: Found files that should have been excluded"
    return 1
  fi

  echo "Exclude patterns test completed successfully!"
  return 0
}

run_tar_comparison_test() {
  echo "=== RUNNING TAR COMPARISON TEST ==="

  # Clean up test directories
  rm -rf /tmp/standard-test /tmp/standard-extracted /tmp/reference-extracted
  rm -f /tmp/original-checksums.txt /tmp/standard-checksums.txt /tmp/reference-checksums.txt

  # Create a diverse test directory structure
  mkdir -p /tmp/standard-test/config/settings
  mkdir -p /tmp/standard-test/data/user/documents
  mkdir -p /tmp/standard-test/data/user/pictures
  mkdir -p /tmp/standard-test/logs

  # Create various file types with random content
  echo "{\"app\": \"tarballer\", \"version\": \"1.0\", \"random_data\": \"$(generate_random_string 32)\"}" > /tmp/standard-test/config/settings/app.json
  echo "debug=true\nlog_level=$(generate_random_string 5)\ndate_format=\"$(generate_random_string 8)\"" > /tmp/standard-test/config/settings/debug.conf

  # Create binary files of different sizes
  generate_random_file "/tmp/standard-test/data/user/documents/binary.dat" 10240
  generate_random_file "/tmp/standard-test/data/user/pictures/image1.raw" 5120
  generate_random_file "/tmp/standard-test/data/user/pictures/image2.raw" 7168

  # Create log files with random entries
  echo "Test log entry 1 - $(generate_random_string 16)" > /tmp/standard-test/logs/app.log
  echo "Test log entry 2 - $(generate_random_string 24)" >> /tmp/standard-test/logs/app.log
  echo "Test log entry 3 - $(generate_random_string 20)" >> /tmp/standard-test/logs/app.log

  # Create text file with random data
  generate_random_string 1024 > /tmp/standard-test/data/user/documents/text_file.txt

  # Create config file with mixed content
  cat << EOF > /tmp/standard-test/config/mixed_content.conf
# Configuration file with mixed content
SERVER_NAME=$(generate_random_string 12)
PORT=8080
MAX_CONNECTIONS=100
TIMEOUT=30
RANDOM_SEED=$(generate_random_string 64)
EOF

  # Create symlinks
  ln -s ../config/settings/app.json /tmp/standard-test/data/config-link.json
  ln -s ../../logs/app.log /tmp/standard-test/data/user/log-link.txt

  # Store MD5 hashes of original files for comparison
  find /tmp/standard-test -type f | sort | xargs md5sum > /tmp/original-checksums.txt

  # Create tarball using our utility
  /bin/tarballer -source /tmp/standard-test -output /workdir/standard.tar.gz -prefix app

  # Create a reference tarball using standard tar for comparison
  tar -czf /workdir/reference.tar.gz -C /tmp/standard-test --transform 's,^./,app/,' .

  echo '=== TARBALLER OUTPUT ==='
  mkdir -p /tmp/standard-extracted
  tar -xzf /workdir/standard.tar.gz -C /tmp/standard-extracted
  find /tmp/standard-extracted -type f -o -type l | sort

  # Extract reference tarball
  echo '=== REFERENCE TAR OUTPUT ==='
  mkdir -p /tmp/reference-extracted
  tar -xzf /workdir/reference.tar.gz -C /tmp/reference-extracted
  find /tmp/reference-extracted -type f -o -type l | sort

  # Verify checksums match for all extracted files
  echo '=== CHECKSUMS OF EXTRACTED FILES ==='
  find /tmp/standard-extracted -type f | sort | xargs md5sum > /tmp/standard-checksums.txt
  find /tmp/reference-extracted -type f | sort | xargs md5sum > /tmp/reference-checksums.txt

  echo 'ORIGINAL FILE CHECKSUMS:'
  cat /tmp/original-checksums.txt

  echo 'TARBALLER EXTRACTED CHECKSUMS:'
  cat /tmp/standard-checksums.txt

  echo 'REFERENCE TAR EXTRACTED CHECKSUMS:'
  cat /tmp/reference-checksums.txt

  # Compare MD5 checksums systematically
  echo '=== SYSTEMATIC MD5 COMPARISON ==='

  # Compare original files to tarballer extraction
  MATCH_COUNT=0
  EXPECTED_COUNT=$(cat /tmp/original-checksums.txt | wc -l)

  # For each original file, find its corresponding extracted file and compare hashes
  while read -r line; do
    ORIG_HASH=$(echo "$line" | awk '{print $1}')
    ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //')
    FILENAME=$(basename "$ORIG_FILE")

    # Look for the same file in the tarballer output
    EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | head -1)
    if [ -n "$EXTRACTED_FILE" ]; then
      EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/standard-checksums.txt | awk '{print $1}')
      if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
        MATCH_COUNT=$((MATCH_COUNT + 1))
      else
        echo "HASH MISMATCH: $FILENAME"
        echo "Original: $ORIG_HASH"
        echo "Extracted: $EXTRACTED_HASH"
      fi
    else
      echo "File not found in extraction: $FILENAME"
    fi
  done < /tmp/original-checksums.txt

  if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
    echo "SUCCESS: Tarballer extraction hashes match original files!"
  else
    echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match"
    return 1
  fi

  # Compare original files to standard tar extraction
  MATCH_COUNT=0

  # For each original file, find its corresponding extracted file and compare hashes
  while read -r line; do
    ORIG_HASH=$(echo "$line" | awk '{print $1}')
    ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //')
    FILENAME=$(basename "$ORIG_FILE")

    # Look for the same file in the reference output
    EXTRACTED_FILE=$(find /tmp/reference-extracted -name "$FILENAME" | head -1)
    if [ -n "$EXTRACTED_FILE" ]; then
      EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/reference-checksums.txt | awk '{print $1}')
      if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
        MATCH_COUNT=$((MATCH_COUNT + 1))
      else
        echo "HASH MISMATCH (ref): $FILENAME"
        echo "Original: $ORIG_HASH"
        echo "Extracted: $EXTRACTED_HASH"
      fi
    else
      echo "File not found in reference extraction: $FILENAME"
    fi
  done < /tmp/original-checksums.txt

  if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
    echo "SUCCESS: Reference tar extraction hashes match original files!"
  else
    echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match"
    return 1
  fi

  # Test symlinks
  echo "=== VERIFYING SYMLINKS ==="
  echo "ORIGINAL SYMLINKS:"
  find /tmp/standard-test -type l -exec ls -la {} \;

  echo "EXTRACTED SYMLINKS:"
  find /tmp/standard-extracted -type l -exec ls -la {} \;

  # Verify file counts
  echo "=== FILE COUNT COMPARISON ==="
  echo "Original files: $(find /tmp/standard-test -type f | wc -l)"
  echo "Extracted files: $(find /tmp/standard-extracted -type f | wc -l)"

  # Test symlink content
  echo "=== TESTING SYMLINK CONTENT ==="
  echo "Original linked content:"
  cat /tmp/standard-test/data/config-link.json

  echo "Extracted linked content:"
  cat /tmp/standard-extracted/app/data/config-link.json

  echo "Tar comparison test completed successfully!"
  return 0
}

# Main script logic
echo "=== RUNNING ALL TESTS ==="

case "$TEST_TYPE" in
  "basic")
    run_basic_test
    RESULT=$?
    ;;
  "tar")
    run_tar_comparison_test
    RESULT=$?
    ;;
  "exclude")
    run_exclude_patterns_test
    RESULT=$?
    ;;
  "clean")
    cleanup_files
    RESULT=0
    ;;
  "all"|*)
    run_basic_test
    RESULT1=$?
    run_tar_comparison_test
    RESULT2=$?
    run_exclude_patterns_test
    RESULT3=$?
    RESULT=$((RESULT1 + RESULT2 + RESULT3))
    ;;
esac

if [ "$TEST_TYPE" != "clean" ]; then
  cleanup_files
fi

if [ "$RESULT" -eq 0 ]; then
  echo "✅ ALL TESTS PASSED SUCCESSFULLY!"
  exit 0
else
  echo "❌ TESTS FAILED WITH ERRORS!"
  exit 1
fi