diff --git a/README.md b/README.md index cd6659c..fabdac6 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ A utility to create tarballs with a specific directory structure and built-in MD - Compatible with standard tar tools - Built-in MD5 hash verification - Automatic file integrity checks during extraction +- Pattern-based file exclusion for creating targeted archives ## Building @@ -56,6 +57,8 @@ Options: - `-source`: Source directory to compress (required) - `-output`: Output tarball filename (default: "output.tar.gz") - `-prefix`: Directory name that will contain all files in the tarball (default: "myapp") +- `-exclude`: Comma-separated list of patterns to exclude (e.g. "*.log,*.tmp,temp/") +- `-verbose`: Enable detailed output during operation ### Extract Mode ```bash @@ -67,6 +70,7 @@ Options: - `-output`: Tarball to extract (required) - `-extractdir`: Extraction directory (default: current directory) - `-verify`: Only verify hash integrity without extraction +- `-verbose`: Enable detailed output during operation ### Examples @@ -74,9 +78,15 @@ Options: # Create a tarball (macOS) ./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app +# Create a tarball excluding specific files +./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app -exclude "*.log,bin/,temp/" + # Extract and verify (Linux) ./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract +# Extract with verbose output +./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract -verbose + # Only verify integrity ./bin/tarballer-linux -extract -verify -output release.tar.gz ``` @@ -86,4 +96,18 @@ Options: 1. During creation, MD5 hashes are calculated for all files and stored in `.md5-manifest.txt` 2. During extraction, file hashes are verified against the manifest 3. The manifest file is removed after successful extraction -4. Extraction aborts with an error if verification fails \ No newline at end of file +4. Extraction aborts with an error if verification fails + +## Exclude Patterns + +The `-exclude` flag accepts a comma-separated list of patterns to exclude from the tarball: + +- Simple wildcards using `*` (matches any sequence of characters) and `?` (matches any single character) +- Directory patterns (ending with `/`) exclude entire directory trees +- File patterns can match by extension (e.g., `*.log`) or name + +Examples: +- `*.log` - Excludes all files with the .log extension +- `bin/` - Excludes the bin directory and all its contents +- `temp/,*.tmp` - Excludes the temp directory and all .tmp files +- `cache/*,*.bak` - Excludes all contents of the cache directory and all .bak files \ No newline at end of file diff --git a/bin/tarballer-darwin b/bin/tarballer-darwin index d962ec6..f38943b 100755 Binary files a/bin/tarballer-darwin and b/bin/tarballer-darwin differ diff --git a/bin/tarballer-freebsd b/bin/tarballer-freebsd index 8654014..bcbe3dc 100755 Binary files a/bin/tarballer-freebsd and b/bin/tarballer-freebsd differ diff --git a/bin/tarballer-linux b/bin/tarballer-linux index fb85b47..34f18b8 100755 Binary files a/bin/tarballer-linux and b/bin/tarballer-linux differ diff --git a/main.go b/main.go index e0c4de4..8ebf535 100644 --- a/main.go +++ b/main.go @@ -11,6 +11,7 @@ import ( "io" "os" "path/filepath" + "regexp" "strings" "time" ) @@ -26,6 +27,7 @@ func main() { extractDir := flag.String("extractdir", "", "Directory to extract to (default: current directory)") verifyOnly := flag.Bool("verify", false, "Only verify hash integrity without extraction") verboseMode := flag.Bool("verbose", false, "Enable verbose output") + excludePatterns := flag.String("exclude", "", "Comma-separated list of patterns to exclude (e.g. \"*.log,*.tmp,temp/\")") flag.Parse() if *extractMode { @@ -58,7 +60,29 @@ func main() { } - err := createTarball(*sourceDir, *outputFile, *prefixDir, *verboseMode) + // Process exclude patterns + var excludeRegexps []*regexp.Regexp + if *excludePatterns != "" { + patterns := strings.Split(*excludePatterns, ",") + for _, pattern := range patterns { + // Trim spaces + pattern = strings.TrimSpace(pattern) + if pattern == "" { + continue + } + + // Convert glob pattern to regexp + regexPattern := globToRegexp(pattern) + re, err := regexp.Compile(regexPattern) + if err != nil { + fmt.Printf("Invalid exclude pattern %q: %v\n", pattern, err) + os.Exit(1) + } + excludeRegexps = append(excludeRegexps, re) + } + } + + err := createTarball(*sourceDir, *outputFile, *prefixDir, excludeRegexps, *verboseMode) if err != nil { fmt.Printf("Error creating tarball: %v\n", err) os.Exit(1) @@ -68,6 +92,29 @@ func main() { } } +// globToRegexp converts a glob pattern (*.log) to a regexp pattern (.*\.log$) +func globToRegexp(pattern string) string { + // Escape special regexp chars that aren't special in glob + pattern = regexp.QuoteMeta(pattern) + + // Convert glob * to regex .* + pattern = strings.ReplaceAll(pattern, "\\*", ".*") + + // Convert glob ? to regex . + pattern = strings.ReplaceAll(pattern, "\\?", ".") + + // Ensure pattern matches the entire filename + if strings.HasSuffix(pattern, "/") { + // For directory patterns, match any path containing this directory + pattern = pattern + ".*" + } else { + // For file patterns, match at the end of the path + pattern = pattern + "$" + } + + return pattern +} + // calcFileMD5 calculates the MD5 hash of a file func calcFileMD5(filePath string) (string, error) { file, err := os.Open(filePath) @@ -84,7 +131,7 @@ func calcFileMD5(filePath string) (string, error) { return hex.EncodeToString(hash.Sum(nil)), nil } -func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error { +func createTarball(sourceDir, outputFile, prefix string, excludePatterns []*regexp.Regexp, verboseMode bool) error { // Resolve absolute path of source directory absSourceDir, err := filepath.Abs(sourceDir) if err != nil { @@ -97,6 +144,12 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error if prefix != "" { fmt.Printf("Using prefix: %s\n", prefix) } + if len(excludePatterns) > 0 { + fmt.Println("Using exclude patterns:") + for i, pattern := range excludePatterns { + fmt.Printf(" %d: %s\n", i+1, pattern) + } + } } // Create the output file @@ -120,6 +173,7 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error // Create a map to store file hashes hashes := make(map[string]string) fileCount := 0 + skippedCount := 0 // Walk through the source directory err = filepath.Walk(absSourceDir, func(path string, info os.FileInfo, err error) error { @@ -143,6 +197,23 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error return nil } + // Check if file matches any exclude patterns + if len(excludePatterns) > 0 { + relPathForward := filepath.ToSlash(relPath) + for _, pattern := range excludePatterns { + if pattern.MatchString(relPathForward) { + if verboseMode { + fmt.Printf("Excluding: %s (matched pattern)\n", relPathForward) + } + skippedCount++ + if info.IsDir() { + return filepath.SkipDir + } + return nil + } + } + } + // Add prefix if specified if prefix != "" { relPath = filepath.Join(prefix, relPath) @@ -233,7 +304,10 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error } if verboseMode { - fmt.Printf("Added %d files to tarball\n", fileCount) + fmt.Printf("Added %d files to tarball\n", fileCount-skippedCount) + if skippedCount > 0 { + fmt.Printf("Excluded %d files/directories\n", skippedCount) + } fmt.Println("Creating MD5 manifest...") } diff --git a/test/README.md b/test/README.md index 41d8329..358cc54 100644 --- a/test/README.md +++ b/test/README.md @@ -57,10 +57,47 @@ If you need to manually test the tarballer utility, you can: # Create a tarball ./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp + # Testing exclude patterns + ./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp -exclude "*.log,temp/" + + # Test with verbose output + ./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp -verbose + # Extract and verify a tarball ./bin/tarballer-darwin -extract -output output.tar.gz -extractdir /path/to/extract ``` +## Testing Exclude Patterns + +To test the exclude patterns feature: + +1. Create a directory with various file types: + ```bash + mkdir -p test-dir/logs test-dir/bin test-dir/src + touch test-dir/file1.txt test-dir/file2.txt + touch test-dir/logs/app.log test-dir/logs/error.log + touch test-dir/bin/executable + touch test-dir/src/main.go test-dir/src/util.go + ``` + +2. Test with various exclude patterns: + ```bash + # Exclude all .log files + ./bin/tarballer-darwin -source test-dir -output test1.tar.gz -prefix test -exclude "*.log" -verbose + + # Exclude entire directories + ./bin/tarballer-darwin -source test-dir -output test2.tar.gz -prefix test -exclude "logs/,bin/" -verbose + + # Exclude multiple patterns + ./bin/tarballer-darwin -source test-dir -output test3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose + ``` + +3. Extract and verify that exclusions worked: + ```bash + ./bin/tarballer-darwin -extract -output test1.tar.gz -extractdir test1-extracted -verbose + find test1-extracted -type f | grep ".log" # Should return nothing + ``` + ## Modifying Tests When modifying tests, keep in mind that the test script uses the container's `/tmp` directory for all temporary files. This keeps the test process self-contained within the container. \ No newline at end of file diff --git a/test/test.sh b/test/test.sh index 90bbcec..4b0df41 100755 --- a/test/test.sh +++ b/test/test.sh @@ -27,6 +27,8 @@ cleanup_files() { rm -rf /tmp/standard-test /tmp/standard-extracted /tmp/reference-extracted rm -f /tmp/original-checksums.txt /tmp/standard-checksums.txt /tmp/reference-checksums.txt rm -f /workdir/complex.tar.gz /workdir/standard.tar.gz /workdir/reference.tar.gz + rm -rf /tmp/exclude-test /tmp/exclude-extracted + rm -f /workdir/exclude.tar.gz echo "Temporary files cleaned up" else echo "Keeping temporary files for inspection" @@ -160,6 +162,113 @@ run_basic_test() { return 0 } +run_exclude_patterns_test() { + echo "=== RUNNING EXCLUDE PATTERNS TEST ===" + + # Clean up test directories + rm -rf /tmp/exclude-test /tmp/exclude-extracted + rm -f /workdir/exclude.tar.gz + + # Create test directory structure + mkdir -p /tmp/exclude-test/logs /tmp/exclude-test/temp + mkdir -p /tmp/exclude-test/src/lib /tmp/exclude-test/bin + mkdir -p /tmp/exclude-test/data + + # Create various file types + echo "Main text file" > /tmp/exclude-test/main.txt + echo "Config file" > /tmp/exclude-test/config.ini + + # Log files (to be excluded with pattern *.log) + echo "Log file 1" > /tmp/exclude-test/logs/app.log + echo "Log file 2" > /tmp/exclude-test/logs/error.log + + # Temporary files (to be excluded with pattern temp/) + echo "Temp file 1" > /tmp/exclude-test/temp/cache.tmp + echo "Temp file 2" > /tmp/exclude-test/temp/session.tmp + + # Source files (some to be excluded with pattern *.go) + echo "Source file Go" > /tmp/exclude-test/src/main.go + echo "Source file C" > /tmp/exclude-test/src/helper.c + echo "Source file Go lib" > /tmp/exclude-test/src/lib/utils.go + echo "Source file C lib" > /tmp/exclude-test/src/lib/core.c + + # Binary files (to be excluded with pattern bin/) + generate_random_file "/tmp/exclude-test/bin/app" 1024 + generate_random_file "/tmp/exclude-test/bin/tool" 512 + + # Data files (not to be excluded) + generate_random_file "/tmp/exclude-test/data/data1.bin" 256 + generate_random_file "/tmp/exclude-test/data/data2.bin" 128 + + # List original structure + echo "=== ORIGINAL STRUCTURE ===" + find /tmp/exclude-test -type f | sort + echo "Total files: $(find /tmp/exclude-test -type f | wc -l)" + + # Test excluding *.log files + echo "=== TEST 1: EXCLUDING *.log FILES ===" + /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude1.tar.gz -prefix test -exclude "*.log" -verbose + + mkdir -p /tmp/exclude-extracted/test1 + tar -xzf /workdir/exclude1.tar.gz -C /tmp/exclude-extracted/test1 + + echo "=== EXTRACTED STRUCTURE (WITHOUT LOGS) ===" + find /tmp/exclude-extracted/test1 -type f | sort + echo "Total files: $(find /tmp/exclude-extracted/test1 -type f | wc -l)" + + # Check that no .log files exist in the extracted archive + LOG_FILES=$(find /tmp/exclude-extracted/test1 -name "*.log" | wc -l) + if [ "$LOG_FILES" -eq 0 ]; then + echo "SUCCESS: No .log files found in the extracted archive" + else + echo "ERROR: Found .log files in the extracted archive" + return 1 + fi + + # Test excluding directories + echo "=== TEST 2: EXCLUDING DIRECTORIES (temp/ and bin/) ===" + /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude2.tar.gz -prefix test -exclude "temp/,bin/" -verbose + + mkdir -p /tmp/exclude-extracted/test2 + tar -xzf /workdir/exclude2.tar.gz -C /tmp/exclude-extracted/test2 + + echo "=== EXTRACTED STRUCTURE (WITHOUT temp/ AND bin/) ===" + find /tmp/exclude-extracted/test2 -type f | sort + echo "Total files: $(find /tmp/exclude-extracted/test2 -type f | wc -l)" + + # Check that the excluded directories don't exist in the extracted archive + EXCLUDED_DIRS=$(find /tmp/exclude-extracted/test2 -path "*/temp" -o -path "*/bin" | wc -l) + if [ "$EXCLUDED_DIRS" -eq 0 ]; then + echo "SUCCESS: No temp/ or bin/ directories found in the extracted archive" + else + echo "ERROR: Found excluded directories in the extracted archive" + return 1 + fi + + # Test excluding multiple patterns + echo "=== TEST 3: EXCLUDING MULTIPLE PATTERNS (*.log, *.go, bin/) ===" + /bin/tarballer -source /tmp/exclude-test -output /workdir/exclude3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose + + mkdir -p /tmp/exclude-extracted/test3 + tar -xzf /workdir/exclude3.tar.gz -C /tmp/exclude-extracted/test3 + + echo "=== EXTRACTED STRUCTURE (WITH MULTIPLE EXCLUSIONS) ===" + find /tmp/exclude-extracted/test3 -type f | sort + echo "Total files: $(find /tmp/exclude-extracted/test3 -type f | wc -l)" + + # Check all exclusions + EXCLUDED_FILES=$(find /tmp/exclude-extracted/test3 -name "*.log" -o -name "*.go" -o -path "*/bin/*" | wc -l) + if [ "$EXCLUDED_FILES" -eq 0 ]; then + echo "SUCCESS: All excluded patterns are working correctly" + else + echo "ERROR: Found files that should have been excluded" + return 1 + fi + + echo "Exclude patterns test completed successfully!" + return 0 +} + run_tar_comparison_test() { echo "=== RUNNING TAR COMPARISON TEST ===" @@ -248,121 +357,128 @@ EOF # For each original file, find its corresponding extracted file and compare hashes while read -r line; do ORIG_HASH=$(echo "$line" | awk '{print $1}') - ORIG_FILE=$(echo "$line" | awk '{print $2}') + ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //') FILENAME=$(basename "$ORIG_FILE") - # Find the corresponding file in the extracted directory - EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | grep -v ".md5-manifest.txt" | head -1) - + # Look for the same file in the tarballer output + EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | head -1) if [ -n "$EXTRACTED_FILE" ]; then - # Get the hash of the extracted file EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/standard-checksums.txt | awk '{print $1}') - if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then MATCH_COUNT=$((MATCH_COUNT + 1)) else - echo "Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH" + echo "HASH MISMATCH: $FILENAME" + echo "Original: $ORIG_HASH" + echo "Extracted: $EXTRACTED_HASH" fi + else + echo "File not found in extraction: $FILENAME" fi done < /tmp/original-checksums.txt if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then - echo 'SUCCESS: Tarballer extraction hashes match original files!' + echo "SUCCESS: Tarballer extraction hashes match original files!" else - echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files." + echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match" return 1 fi - - # Similar check for reference tar extraction + + # Compare original files to standard tar extraction MATCH_COUNT=0 + # For each original file, find its corresponding extracted file and compare hashes while read -r line; do ORIG_HASH=$(echo "$line" | awk '{print $1}') - ORIG_FILE=$(echo "$line" | awk '{print $2}') + ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //') FILENAME=$(basename "$ORIG_FILE") - # Find the corresponding file in the extracted directory + # Look for the same file in the reference output EXTRACTED_FILE=$(find /tmp/reference-extracted -name "$FILENAME" | head -1) - if [ -n "$EXTRACTED_FILE" ]; then - # Get the hash of the extracted file EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/reference-checksums.txt | awk '{print $1}') - if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then MATCH_COUNT=$((MATCH_COUNT + 1)) else - echo "Hash mismatch for $FILENAME: original=$ORIG_HASH reference=$EXTRACTED_HASH" + echo "HASH MISMATCH (ref): $FILENAME" + echo "Original: $ORIG_HASH" + echo "Extracted: $EXTRACTED_HASH" fi + else + echo "File not found in reference extraction: $FILENAME" fi done < /tmp/original-checksums.txt if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then - echo 'SUCCESS: Reference tar extraction hashes match original files!' + echo "SUCCESS: Reference tar extraction hashes match original files!" else - echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files." + echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match" return 1 fi - - echo '=== VERIFYING SYMLINKS ===' - echo 'ORIGINAL SYMLINKS:' + + # Test symlinks + echo "=== VERIFYING SYMLINKS ===" + echo "ORIGINAL SYMLINKS:" find /tmp/standard-test -type l -exec ls -la {} \; - echo 'EXTRACTED SYMLINKS:' + + echo "EXTRACTED SYMLINKS:" find /tmp/standard-extracted -type l -exec ls -la {} \; - - # Compare file counts to ensure all files were extracted - echo '=== FILE COUNT COMPARISON ===' - echo -n 'Original files: ' && find /tmp/standard-test -type f | wc -l - echo -n 'Extracted files: ' && find /tmp/standard-extracted -type f | wc -l - - # Test symlink functionality - echo '=== TESTING SYMLINK CONTENT ===' - echo 'Original linked content:' + + # Verify file counts + echo "=== FILE COUNT COMPARISON ===" + echo "Original files: $(find /tmp/standard-test -type f | wc -l)" + echo "Extracted files: $(find /tmp/standard-extracted -type f | wc -l)" + + # Test symlink content + echo "=== TESTING SYMLINK CONTENT ===" + echo "Original linked content:" cat /tmp/standard-test/data/config-link.json - echo 'Extracted linked content:' + + echo "Extracted linked content:" cat /tmp/standard-extracted/app/data/config-link.json - - echo 'Tar comparison test completed successfully!' + + echo "Tar comparison test completed successfully!" return 0 } -# Main script execution +# Main script logic +echo "=== RUNNING ALL TESTS ===" + case "$TEST_TYPE" in - "basic") + "basic") run_basic_test RESULT=$? - [ "$RESULT" -eq 0 ] && cleanup_files - exit $RESULT ;; - "tar") + "tar") run_tar_comparison_test RESULT=$? - [ "$RESULT" -eq 0 ] && cleanup_files - exit $RESULT ;; - "all") - echo "=== RUNNING ALL TESTS ===" - run_basic_test - BASIC_RESULT=$? - run_tar_comparison_test - TAR_RESULT=$? - - if [ $BASIC_RESULT -eq 0 ] && [ $TAR_RESULT -eq 0 ]; then - echo "✅ ALL TESTS PASSED SUCCESSFULLY!" - cleanup_files - exit 0 - else - echo "❌ SOME TESTS FAILED!" - exit 1 - fi + "exclude") + run_exclude_patterns_test + RESULT=$? ;; "clean") cleanup_files - exit 0 + RESULT=0 ;; - *) - echo "Unknown test type: $TEST_TYPE" - echo "Usage: $0 [basic|tar|all|clean] [keep_temp_files]" - echo " keep_temp_files: 0 (clean up, default) or 1 (keep temp files)" - exit 1 + "all"|*) + run_basic_test + RESULT1=$? + run_tar_comparison_test + RESULT2=$? + run_exclude_patterns_test + RESULT3=$? + RESULT=$((RESULT1 + RESULT2 + RESULT3)) ;; -esac \ No newline at end of file +esac + +if [ "$TEST_TYPE" != "clean" ]; then + cleanup_files +fi + +if [ "$RESULT" -eq 0 ]; then + echo "✅ ALL TESTS PASSED SUCCESSFULLY!" + exit 0 +else + echo "❌ TESTS FAILED WITH ERRORS!" + exit 1 +fi \ No newline at end of file