#!/bin/sh # Determine which test to run TEST_TYPE=${1:-"all"} # Set to 1 to keep temporary files, 0 to clean them up KEEP_TEMP_FILES=${2:-0} # Function to generate a random file with predictable size generate_random_file() { local OUTPUT=$1 local SIZE=$2 dd if=/dev/urandom bs=1 count=$SIZE of="$OUTPUT" 2>/dev/null } # Generate random string (for file contents) generate_random_string() { local LENGTH=$1 tr -dc A-Za-z0-9 /tmp/complex/rootfile.txt echo "level 1 file in dir1 ($(generate_random_string 12))" > /tmp/complex/dir1/file1.txt echo "level 1 file in dir2 ($(generate_random_string 16))" > /tmp/complex/dir2/file2.txt echo "level 2 file in subdir1 ($(generate_random_string 10))" > /tmp/complex/dir1/subdir1/file3.txt echo "level 2 file in subdir2 ($(generate_random_string 14))" > /tmp/complex/dir1/subdir2/file4.txt echo "level 3 file in subsubdir1 ($(generate_random_string 20))" > /tmp/complex/dir1/subdir1/subsubdir1/file5.txt # Add random binary files of different sizes generate_random_file "/tmp/complex/random_binary_small.bin" 512 generate_random_file "/tmp/complex/dir1/random_binary_medium.bin" 2048 generate_random_file "/tmp/complex/dir2/random_binary_large.bin" 8192 # Create a symbolic link with a relative path instead of absolute cd /tmp/complex/dir2 && ln -s ../rootfile.txt symlink.txt && cd /workdir # Calculate MD5 hashes of original files for verification find /tmp/complex -type f | sort | xargs md5sum > /tmp/complex-original-md5.txt # Print the original structure for reference echo '=== ORIGINAL DIRECTORY STRUCTURE ===' find /tmp/complex -type f -o -type l | sort # Create the tarball /bin/tarballer -source /tmp/complex -output /workdir/complex.tar.gz -prefix complex-app # Extract the tarball mkdir -p /tmp/complex-extracted tar -xzf /workdir/complex.tar.gz -C /tmp/complex-extracted # Verify the extracted structure echo '=== EXTRACTED DIRECTORY STRUCTURE ===' find /tmp/complex-extracted -type f -o -type l | sort # Calculate MD5 hashes of extracted files find /tmp/complex-extracted -type f | sort | xargs md5sum > /tmp/complex-extracted-md5.txt # Compare file content echo '=== VERIFYING FILE CONTENTS ===' cat /tmp/complex/rootfile.txt echo ' <-- Original: rootfile.txt' cat /tmp/complex-extracted/complex-app/rootfile.txt echo ' <-- Extracted: rootfile.txt' cat /tmp/complex/dir1/subdir1/subsubdir1/file5.txt echo ' <-- Original: deep nested file5.txt' cat /tmp/complex-extracted/complex-app/dir1/subdir1/subsubdir1/file5.txt echo ' <-- Extracted: deep nested file5.txt' # Verify binary file MD5 hashes specifically echo '=== VERIFYING BINARY FILE MD5 HASHES ===' md5sum /tmp/complex/random_binary_small.bin md5sum /tmp/complex-extracted/complex-app/random_binary_small.bin md5sum /tmp/complex/dir1/random_binary_medium.bin md5sum /tmp/complex-extracted/complex-app/dir1/random_binary_medium.bin md5sum /tmp/complex/dir2/random_binary_large.bin md5sum /tmp/complex-extracted/complex-app/dir2/random_binary_large.bin # Test symlink echo '=== TESTING SYMLINK ===' ls -la /tmp/complex/dir2/symlink.txt ls -la /tmp/complex-extracted/complex-app/dir2/symlink.txt # Verify MD5 hashes echo '=== MD5 HASH VERIFICATION ===' echo 'Original file hashes:' cat /tmp/complex-original-md5.txt echo 'Extracted file hashes:' cat /tmp/complex-extracted-md5.txt # Verify hash comparison echo '=== COMPARING FILE HASHES ===' # Extract just file paths from original hashes ORIG_FILES=$(cat /tmp/complex-original-md5.txt | awk '{print $2}' | sort) # For each original file, check if its corresponding extracted file has the same hash ALL_MATCH=1 for SOURCE_FILE in $ORIG_FILES; do # Get the base file name FILENAME=$(basename "$SOURCE_FILE") # Find the corresponding hash from original file ORIG_HASH=$(grep "$SOURCE_FILE" /tmp/complex-original-md5.txt | awk '{print $1}') # Find the corresponding file in the extracted directory and get its hash EXTRACTED_FILE=$(find /tmp/complex-extracted -name "$FILENAME" | head -1) if [ -z "$EXTRACTED_FILE" ]; then echo "ERROR: File $FILENAME not found in extracted directory" ALL_MATCH=0 continue fi EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/complex-extracted-md5.txt | awk '{print $1}') if [ "$ORIG_HASH" != "$EXTRACTED_HASH" ]; then echo "ERROR: Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH" ALL_MATCH=0 fi done if [ "$ALL_MATCH" -eq 1 ]; then echo 'SUCCESS: All file hashes match between original and extracted files!' else echo 'ERROR: Hash mismatch detected!' return 1 fi echo 'Basic test completed successfully!' return 0 } run_exclude_patterns_test() { echo "=== RUNNING EXCLUDE PATTERNS TEST ===" # Clean up test directories rm -rf /tmp/exclude-test /tmp/exclude-extracted rm -f /workdir/exclude.tar.gz # Create test directory structure mkdir -p /tmp/exclude-test/logs /tmp/exclude-test/temp mkdir -p /tmp/exclude-test/src/lib /tmp/exclude-test/bin mkdir -p /tmp/exclude-test/data # Create various file types echo "Main text file" > /tmp/exclude-test/main.txt echo "Config file" > /tmp/exclude-test/config.ini # Log files (to be excluded with pattern *.log) echo "Log file 1" > /tmp/exclude-test/logs/app.log echo "Log file 2" > /tmp/exclude-test/logs/error.log # Temporary files (to be excluded with pattern temp/) echo "Temp file 1" > /tmp/exclude-test/temp/cache.tmp echo "Temp file 2" > /tmp/exclude-test/temp/session.tmp # Source files (some to be excluded with pattern *.go) echo "Source file Go" > /tmp/exclude-test/src/main.go echo "Source file C" > /tmp/exclude-test/src/helper.c echo "Source file Go lib" > /tmp/exclude-test/src/lib/utils.go echo "Source file C lib" > /tmp/exclude-test/src/lib/core.c # Binary files (to be excluded with pattern bin/) generate_random_file "/tmp/exclude-test/bin/app" 1024 generate_random_file "/tmp/exclude-test/bin/tool" 512 # Data files (not to be excluded) generate_random_file "/tmp/exclude-test/data/data1.bin" 256 generate_random_file "/tmp/exclude-test/data/data2.bin" 128 # List original structure echo "=== ORIGINAL STRUCTURE ===" find /tmp/exclude-test -type f | sort echo "Total files: $(find /tmp/exclude-test -type f | wc -l)" # Test excluding *.log files echo "=== TEST 1: EXCLUDING *.log FILES ===" /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude1.tar.gz -prefix test -exclude "*.log" -verbose mkdir -p /tmp/exclude-extracted/test1 tar -xzf /workdir/exclude1.tar.gz -C /tmp/exclude-extracted/test1 echo "=== EXTRACTED STRUCTURE (WITHOUT LOGS) ===" find /tmp/exclude-extracted/test1 -type f | sort echo "Total files: $(find /tmp/exclude-extracted/test1 -type f | wc -l)" # Check that no .log files exist in the extracted archive LOG_FILES=$(find /tmp/exclude-extracted/test1 -name "*.log" | wc -l) if [ "$LOG_FILES" -eq 0 ]; then echo "SUCCESS: No .log files found in the extracted archive" else echo "ERROR: Found .log files in the extracted archive" return 1 fi # Test excluding directories echo "=== TEST 2: EXCLUDING DIRECTORIES (temp/ and bin/) ===" /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude2.tar.gz -prefix test -exclude "temp/,bin/" -verbose mkdir -p /tmp/exclude-extracted/test2 tar -xzf /workdir/exclude2.tar.gz -C /tmp/exclude-extracted/test2 echo "=== EXTRACTED STRUCTURE (WITHOUT temp/ AND bin/) ===" find /tmp/exclude-extracted/test2 -type f | sort echo "Total files: $(find /tmp/exclude-extracted/test2 -type f | wc -l)" # Check that the excluded directories don't exist in the extracted archive EXCLUDED_DIRS=$(find /tmp/exclude-extracted/test2 -path "*/temp/*" -o -path "*/bin/*" | wc -l) if [ "$EXCLUDED_DIRS" -eq 0 ]; then echo "SUCCESS: No contents of temp/ or bin/ directories found in the extracted archive" else echo "ERROR: Found contents of excluded directories in the extracted archive" return 1 fi # Test excluding multiple patterns echo "=== TEST 3: EXCLUDING MULTIPLE PATTERNS (*.log, *.go, bin/) ===" /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose mkdir -p /tmp/exclude-extracted/test3 tar -xzf /workdir/exclude3.tar.gz -C /tmp/exclude-extracted/test3 echo "=== EXTRACTED STRUCTURE (WITH MULTIPLE EXCLUSIONS) ===" find /tmp/exclude-extracted/test3 -type f | sort echo "Total files: $(find /tmp/exclude-extracted/test3 -type f | wc -l)" # Check all exclusions EXCLUDED_FILES=$(find /tmp/exclude-extracted/test3 -name "*.log" -o -name "*.go" -o -path "*/bin/*" | wc -l) if [ "$EXCLUDED_FILES" -eq 0 ]; then echo "SUCCESS: All excluded patterns are working correctly" else echo "ERROR: Found files that should have been excluded" return 1 fi echo "Exclude patterns test completed successfully!" return 0 } run_tar_comparison_test() { echo "=== RUNNING TAR COMPARISON TEST ===" # Clean up test directories rm -rf /tmp/standard-test /tmp/standard-extracted /tmp/reference-extracted rm -f /tmp/original-checksums.txt /tmp/standard-checksums.txt /tmp/reference-checksums.txt # Create a diverse test directory structure mkdir -p /tmp/standard-test/config/settings mkdir -p /tmp/standard-test/data/user/documents mkdir -p /tmp/standard-test/data/user/pictures mkdir -p /tmp/standard-test/logs # Create various file types with random content echo "{\"app\": \"tarballer\", \"version\": \"1.0\", \"random_data\": \"$(generate_random_string 32)\"}" > /tmp/standard-test/config/settings/app.json echo "debug=true\nlog_level=$(generate_random_string 5)\ndate_format=\"$(generate_random_string 8)\"" > /tmp/standard-test/config/settings/debug.conf # Create binary files of different sizes generate_random_file "/tmp/standard-test/data/user/documents/binary.dat" 10240 generate_random_file "/tmp/standard-test/data/user/pictures/image1.raw" 5120 generate_random_file "/tmp/standard-test/data/user/pictures/image2.raw" 7168 # Create log files with random entries echo "Test log entry 1 - $(generate_random_string 16)" > /tmp/standard-test/logs/app.log echo "Test log entry 2 - $(generate_random_string 24)" >> /tmp/standard-test/logs/app.log echo "Test log entry 3 - $(generate_random_string 20)" >> /tmp/standard-test/logs/app.log # Create text file with random data generate_random_string 1024 > /tmp/standard-test/data/user/documents/text_file.txt # Create config file with mixed content cat << EOF > /tmp/standard-test/config/mixed_content.conf # Configuration file with mixed content SERVER_NAME=$(generate_random_string 12) PORT=8080 MAX_CONNECTIONS=100 TIMEOUT=30 RANDOM_SEED=$(generate_random_string 64) EOF # Create symlinks ln -s ../config/settings/app.json /tmp/standard-test/data/config-link.json ln -s ../../logs/app.log /tmp/standard-test/data/user/log-link.txt # Store MD5 hashes of original files for comparison find /tmp/standard-test -type f | sort | xargs md5sum > /tmp/original-checksums.txt # Create tarball using our utility /bin/tarballer -source /tmp/standard-test -output /workdir/standard.tar.gz -prefix app # Create a reference tarball using standard tar for comparison tar -czf /workdir/reference.tar.gz -C /tmp/standard-test --transform 's,^./,app/,' . echo '=== TARBALLER OUTPUT ===' mkdir -p /tmp/standard-extracted tar -xzf /workdir/standard.tar.gz -C /tmp/standard-extracted find /tmp/standard-extracted -type f -o -type l | sort # Extract reference tarball echo '=== REFERENCE TAR OUTPUT ===' mkdir -p /tmp/reference-extracted tar -xzf /workdir/reference.tar.gz -C /tmp/reference-extracted find /tmp/reference-extracted -type f -o -type l | sort # Verify checksums match for all extracted files echo '=== CHECKSUMS OF EXTRACTED FILES ===' find /tmp/standard-extracted -type f | sort | xargs md5sum > /tmp/standard-checksums.txt find /tmp/reference-extracted -type f | sort | xargs md5sum > /tmp/reference-checksums.txt echo 'ORIGINAL FILE CHECKSUMS:' cat /tmp/original-checksums.txt echo 'TARBALLER EXTRACTED CHECKSUMS:' cat /tmp/standard-checksums.txt echo 'REFERENCE TAR EXTRACTED CHECKSUMS:' cat /tmp/reference-checksums.txt # Compare MD5 checksums systematically echo '=== SYSTEMATIC MD5 COMPARISON ===' # Compare original files to tarballer extraction MATCH_COUNT=0 EXPECTED_COUNT=$(cat /tmp/original-checksums.txt | wc -l) # For each original file, find its corresponding extracted file and compare hashes while read -r line; do ORIG_HASH=$(echo "$line" | awk '{print $1}') ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //') FILENAME=$(basename "$ORIG_FILE") # Look for the same file in the tarballer output EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | head -1) if [ -n "$EXTRACTED_FILE" ]; then EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/standard-checksums.txt | awk '{print $1}') if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then MATCH_COUNT=$((MATCH_COUNT + 1)) else echo "HASH MISMATCH: $FILENAME" echo "Original: $ORIG_HASH" echo "Extracted: $EXTRACTED_HASH" fi else echo "File not found in extraction: $FILENAME" fi done < /tmp/original-checksums.txt if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then echo "SUCCESS: Tarballer extraction hashes match original files!" else echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match" return 1 fi # Compare original files to standard tar extraction MATCH_COUNT=0 # For each original file, find its corresponding extracted file and compare hashes while read -r line; do ORIG_HASH=$(echo "$line" | awk '{print $1}') ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //') FILENAME=$(basename "$ORIG_FILE") # Look for the same file in the reference output EXTRACTED_FILE=$(find /tmp/reference-extracted -name "$FILENAME" | head -1) if [ -n "$EXTRACTED_FILE" ]; then EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/reference-checksums.txt | awk '{print $1}') if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then MATCH_COUNT=$((MATCH_COUNT + 1)) else echo "HASH MISMATCH (ref): $FILENAME" echo "Original: $ORIG_HASH" echo "Extracted: $EXTRACTED_HASH" fi else echo "File not found in reference extraction: $FILENAME" fi done < /tmp/original-checksums.txt if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then echo "SUCCESS: Reference tar extraction hashes match original files!" else echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match" return 1 fi # Test symlinks echo "=== VERIFYING SYMLINKS ===" echo "ORIGINAL SYMLINKS:" find /tmp/standard-test -type l -exec ls -la {} \; echo "EXTRACTED SYMLINKS:" find /tmp/standard-extracted -type l -exec ls -la {} \; # Verify file counts echo "=== FILE COUNT COMPARISON ===" echo "Original files: $(find /tmp/standard-test -type f | wc -l)" echo "Extracted files: $(find /tmp/standard-extracted -type f | wc -l)" # Test symlink content echo "=== TESTING SYMLINK CONTENT ===" echo "Original linked content:" cat /tmp/standard-test/data/config-link.json echo "Extracted linked content:" cat /tmp/standard-extracted/app/data/config-link.json echo "Tar comparison test completed successfully!" return 0 } # Main script logic echo "=== RUNNING ALL TESTS ===" case "$TEST_TYPE" in "basic") run_basic_test RESULT=$? ;; "tar") run_tar_comparison_test RESULT=$? ;; "exclude") run_exclude_patterns_test RESULT=$? ;; "clean") cleanup_files RESULT=0 ;; "all"|*) run_basic_test RESULT1=$? run_tar_comparison_test RESULT2=$? run_exclude_patterns_test RESULT3=$? RESULT=$((RESULT1 + RESULT2 + RESULT3)) ;; esac if [ "$TEST_TYPE" != "clean" ]; then cleanup_files fi if [ "$RESULT" -eq 0 ]; then echo "✅ ALL TESTS PASSED SUCCESSFULLY!" exit 0 else echo "❌ TESTS FAILED WITH ERRORS!" exit 1 fi