Add exclude patterns functionality with tests and documentation

This commit is contained in:
Leopere 2025-03-21 14:56:47 -04:00
parent b546e84afd
commit fdc692c90c
7 changed files with 319 additions and 68 deletions

View File

@ -11,6 +11,7 @@ A utility to create tarballs with a specific directory structure and built-in MD
- Compatible with standard tar tools
- Built-in MD5 hash verification
- Automatic file integrity checks during extraction
- Pattern-based file exclusion for creating targeted archives
## Building
@ -56,6 +57,8 @@ Options:
- `-source`: Source directory to compress (required)
- `-output`: Output tarball filename (default: "output.tar.gz")
- `-prefix`: Directory name that will contain all files in the tarball (default: "myapp")
- `-exclude`: Comma-separated list of patterns to exclude (e.g. "*.log,*.tmp,temp/")
- `-verbose`: Enable detailed output during operation
### Extract Mode
```bash
@ -67,6 +70,7 @@ Options:
- `-output`: Tarball to extract (required)
- `-extractdir`: Extraction directory (default: current directory)
- `-verify`: Only verify hash integrity without extraction
- `-verbose`: Enable detailed output during operation
### Examples
@ -74,9 +78,15 @@ Options:
# Create a tarball (macOS)
./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app
# Create a tarball excluding specific files
./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app -exclude "*.log,bin/,temp/"
# Extract and verify (Linux)
./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract
# Extract with verbose output
./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract -verbose
# Only verify integrity
./bin/tarballer-linux -extract -verify -output release.tar.gz
```
@ -87,3 +97,17 @@ Options:
2. During extraction, file hashes are verified against the manifest
3. The manifest file is removed after successful extraction
4. Extraction aborts with an error if verification fails
## Exclude Patterns
The `-exclude` flag accepts a comma-separated list of patterns to exclude from the tarball:
- Simple wildcards using `*` (matches any sequence of characters) and `?` (matches any single character)
- Directory patterns (ending with `/`) exclude entire directory trees
- File patterns can match by extension (e.g., `*.log`) or name
Examples:
- `*.log` - Excludes all files with the .log extension
- `bin/` - Excludes the bin directory and all its contents
- `temp/,*.tmp` - Excludes the temp directory and all .tmp files
- `cache/*,*.bak` - Excludes all contents of the cache directory and all .bak files

Binary file not shown.

Binary file not shown.

Binary file not shown.

80
main.go
View File

@ -11,6 +11,7 @@ import (
"io"
"os"
"path/filepath"
"regexp"
"strings"
"time"
)
@ -26,6 +27,7 @@ func main() {
extractDir := flag.String("extractdir", "", "Directory to extract to (default: current directory)")
verifyOnly := flag.Bool("verify", false, "Only verify hash integrity without extraction")
verboseMode := flag.Bool("verbose", false, "Enable verbose output")
excludePatterns := flag.String("exclude", "", "Comma-separated list of patterns to exclude (e.g. \"*.log,*.tmp,temp/\")")
flag.Parse()
if *extractMode {
@ -58,7 +60,29 @@ func main() {
}
err := createTarball(*sourceDir, *outputFile, *prefixDir, *verboseMode)
// Process exclude patterns
var excludeRegexps []*regexp.Regexp
if *excludePatterns != "" {
patterns := strings.Split(*excludePatterns, ",")
for _, pattern := range patterns {
// Trim spaces
pattern = strings.TrimSpace(pattern)
if pattern == "" {
continue
}
// Convert glob pattern to regexp
regexPattern := globToRegexp(pattern)
re, err := regexp.Compile(regexPattern)
if err != nil {
fmt.Printf("Invalid exclude pattern %q: %v\n", pattern, err)
os.Exit(1)
}
excludeRegexps = append(excludeRegexps, re)
}
}
err := createTarball(*sourceDir, *outputFile, *prefixDir, excludeRegexps, *verboseMode)
if err != nil {
fmt.Printf("Error creating tarball: %v\n", err)
os.Exit(1)
@ -68,6 +92,29 @@ func main() {
}
}
// globToRegexp converts a glob pattern (*.log) to a regexp pattern (.*\.log$)
func globToRegexp(pattern string) string {
// Escape special regexp chars that aren't special in glob
pattern = regexp.QuoteMeta(pattern)
// Convert glob * to regex .*
pattern = strings.ReplaceAll(pattern, "\\*", ".*")
// Convert glob ? to regex .
pattern = strings.ReplaceAll(pattern, "\\?", ".")
// Ensure pattern matches the entire filename
if strings.HasSuffix(pattern, "/") {
// For directory patterns, match any path containing this directory
pattern = pattern + ".*"
} else {
// For file patterns, match at the end of the path
pattern = pattern + "$"
}
return pattern
}
// calcFileMD5 calculates the MD5 hash of a file
func calcFileMD5(filePath string) (string, error) {
file, err := os.Open(filePath)
@ -84,7 +131,7 @@ func calcFileMD5(filePath string) (string, error) {
return hex.EncodeToString(hash.Sum(nil)), nil
}
func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error {
func createTarball(sourceDir, outputFile, prefix string, excludePatterns []*regexp.Regexp, verboseMode bool) error {
// Resolve absolute path of source directory
absSourceDir, err := filepath.Abs(sourceDir)
if err != nil {
@ -97,6 +144,12 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
if prefix != "" {
fmt.Printf("Using prefix: %s\n", prefix)
}
if len(excludePatterns) > 0 {
fmt.Println("Using exclude patterns:")
for i, pattern := range excludePatterns {
fmt.Printf(" %d: %s\n", i+1, pattern)
}
}
}
// Create the output file
@ -120,6 +173,7 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
// Create a map to store file hashes
hashes := make(map[string]string)
fileCount := 0
skippedCount := 0
// Walk through the source directory
err = filepath.Walk(absSourceDir, func(path string, info os.FileInfo, err error) error {
@ -143,6 +197,23 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
return nil
}
// Check if file matches any exclude patterns
if len(excludePatterns) > 0 {
relPathForward := filepath.ToSlash(relPath)
for _, pattern := range excludePatterns {
if pattern.MatchString(relPathForward) {
if verboseMode {
fmt.Printf("Excluding: %s (matched pattern)\n", relPathForward)
}
skippedCount++
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
}
}
// Add prefix if specified
if prefix != "" {
relPath = filepath.Join(prefix, relPath)
@ -233,7 +304,10 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
}
if verboseMode {
fmt.Printf("Added %d files to tarball\n", fileCount)
fmt.Printf("Added %d files to tarball\n", fileCount-skippedCount)
if skippedCount > 0 {
fmt.Printf("Excluded %d files/directories\n", skippedCount)
}
fmt.Println("Creating MD5 manifest...")
}

View File

@ -57,10 +57,47 @@ If you need to manually test the tarballer utility, you can:
# Create a tarball
./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp
# Testing exclude patterns
./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp -exclude "*.log,temp/"
# Test with verbose output
./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp -verbose
# Extract and verify a tarball
./bin/tarballer-darwin -extract -output output.tar.gz -extractdir /path/to/extract
```
## Testing Exclude Patterns
To test the exclude patterns feature:
1. Create a directory with various file types:
```bash
mkdir -p test-dir/logs test-dir/bin test-dir/src
touch test-dir/file1.txt test-dir/file2.txt
touch test-dir/logs/app.log test-dir/logs/error.log
touch test-dir/bin/executable
touch test-dir/src/main.go test-dir/src/util.go
```
2. Test with various exclude patterns:
```bash
# Exclude all .log files
./bin/tarballer-darwin -source test-dir -output test1.tar.gz -prefix test -exclude "*.log" -verbose
# Exclude entire directories
./bin/tarballer-darwin -source test-dir -output test2.tar.gz -prefix test -exclude "logs/,bin/" -verbose
# Exclude multiple patterns
./bin/tarballer-darwin -source test-dir -output test3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose
```
3. Extract and verify that exclusions worked:
```bash
./bin/tarballer-darwin -extract -output test1.tar.gz -extractdir test1-extracted -verbose
find test1-extracted -type f | grep ".log" # Should return nothing
```
## Modifying Tests
When modifying tests, keep in mind that the test script uses the container's `/tmp` directory for all temporary files. This keeps the test process self-contained within the container.

View File

@ -27,6 +27,8 @@ cleanup_files() {
rm -rf /tmp/standard-test /tmp/standard-extracted /tmp/reference-extracted
rm -f /tmp/original-checksums.txt /tmp/standard-checksums.txt /tmp/reference-checksums.txt
rm -f /workdir/complex.tar.gz /workdir/standard.tar.gz /workdir/reference.tar.gz
rm -rf /tmp/exclude-test /tmp/exclude-extracted
rm -f /workdir/exclude.tar.gz
echo "Temporary files cleaned up"
else
echo "Keeping temporary files for inspection"
@ -160,6 +162,113 @@ run_basic_test() {
return 0
}
run_exclude_patterns_test() {
echo "=== RUNNING EXCLUDE PATTERNS TEST ==="
# Clean up test directories
rm -rf /tmp/exclude-test /tmp/exclude-extracted
rm -f /workdir/exclude.tar.gz
# Create test directory structure
mkdir -p /tmp/exclude-test/logs /tmp/exclude-test/temp
mkdir -p /tmp/exclude-test/src/lib /tmp/exclude-test/bin
mkdir -p /tmp/exclude-test/data
# Create various file types
echo "Main text file" > /tmp/exclude-test/main.txt
echo "Config file" > /tmp/exclude-test/config.ini
# Log files (to be excluded with pattern *.log)
echo "Log file 1" > /tmp/exclude-test/logs/app.log
echo "Log file 2" > /tmp/exclude-test/logs/error.log
# Temporary files (to be excluded with pattern temp/)
echo "Temp file 1" > /tmp/exclude-test/temp/cache.tmp
echo "Temp file 2" > /tmp/exclude-test/temp/session.tmp
# Source files (some to be excluded with pattern *.go)
echo "Source file Go" > /tmp/exclude-test/src/main.go
echo "Source file C" > /tmp/exclude-test/src/helper.c
echo "Source file Go lib" > /tmp/exclude-test/src/lib/utils.go
echo "Source file C lib" > /tmp/exclude-test/src/lib/core.c
# Binary files (to be excluded with pattern bin/)
generate_random_file "/tmp/exclude-test/bin/app" 1024
generate_random_file "/tmp/exclude-test/bin/tool" 512
# Data files (not to be excluded)
generate_random_file "/tmp/exclude-test/data/data1.bin" 256
generate_random_file "/tmp/exclude-test/data/data2.bin" 128
# List original structure
echo "=== ORIGINAL STRUCTURE ==="
find /tmp/exclude-test -type f | sort
echo "Total files: $(find /tmp/exclude-test -type f | wc -l)"
# Test excluding *.log files
echo "=== TEST 1: EXCLUDING *.log FILES ==="
/bin/tarballer -source /tmp/exclude-test -output /workdir/exclude1.tar.gz -prefix test -exclude "*.log" -verbose
mkdir -p /tmp/exclude-extracted/test1
tar -xzf /workdir/exclude1.tar.gz -C /tmp/exclude-extracted/test1
echo "=== EXTRACTED STRUCTURE (WITHOUT LOGS) ==="
find /tmp/exclude-extracted/test1 -type f | sort
echo "Total files: $(find /tmp/exclude-extracted/test1 -type f | wc -l)"
# Check that no .log files exist in the extracted archive
LOG_FILES=$(find /tmp/exclude-extracted/test1 -name "*.log" | wc -l)
if [ "$LOG_FILES" -eq 0 ]; then
echo "SUCCESS: No .log files found in the extracted archive"
else
echo "ERROR: Found .log files in the extracted archive"
return 1
fi
# Test excluding directories
echo "=== TEST 2: EXCLUDING DIRECTORIES (temp/ and bin/) ==="
/bin/tarballer -source /tmp/exclude-test -output /workdir/exclude2.tar.gz -prefix test -exclude "temp/,bin/" -verbose
mkdir -p /tmp/exclude-extracted/test2
tar -xzf /workdir/exclude2.tar.gz -C /tmp/exclude-extracted/test2
echo "=== EXTRACTED STRUCTURE (WITHOUT temp/ AND bin/) ==="
find /tmp/exclude-extracted/test2 -type f | sort
echo "Total files: $(find /tmp/exclude-extracted/test2 -type f | wc -l)"
# Check that the excluded directories don't exist in the extracted archive
EXCLUDED_DIRS=$(find /tmp/exclude-extracted/test2 -path "*/temp" -o -path "*/bin" | wc -l)
if [ "$EXCLUDED_DIRS" -eq 0 ]; then
echo "SUCCESS: No temp/ or bin/ directories found in the extracted archive"
else
echo "ERROR: Found excluded directories in the extracted archive"
return 1
fi
# Test excluding multiple patterns
echo "=== TEST 3: EXCLUDING MULTIPLE PATTERNS (*.log, *.go, bin/) ==="
/bin/tarballer -source /tmp/exclude-test -output /workdir/exclude3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose
mkdir -p /tmp/exclude-extracted/test3
tar -xzf /workdir/exclude3.tar.gz -C /tmp/exclude-extracted/test3
echo "=== EXTRACTED STRUCTURE (WITH MULTIPLE EXCLUSIONS) ==="
find /tmp/exclude-extracted/test3 -type f | sort
echo "Total files: $(find /tmp/exclude-extracted/test3 -type f | wc -l)"
# Check all exclusions
EXCLUDED_FILES=$(find /tmp/exclude-extracted/test3 -name "*.log" -o -name "*.go" -o -path "*/bin/*" | wc -l)
if [ "$EXCLUDED_FILES" -eq 0 ]; then
echo "SUCCESS: All excluded patterns are working correctly"
else
echo "ERROR: Found files that should have been excluded"
return 1
fi
echo "Exclude patterns test completed successfully!"
return 0
}
run_tar_comparison_test() {
echo "=== RUNNING TAR COMPARISON TEST ==="
@ -248,121 +357,128 @@ EOF
# For each original file, find its corresponding extracted file and compare hashes
while read -r line; do
ORIG_HASH=$(echo "$line" | awk '{print $1}')
ORIG_FILE=$(echo "$line" | awk '{print $2}')
ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //')
FILENAME=$(basename "$ORIG_FILE")
# Find the corresponding file in the extracted directory
EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | grep -v ".md5-manifest.txt" | head -1)
# Look for the same file in the tarballer output
EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | head -1)
if [ -n "$EXTRACTED_FILE" ]; then
# Get the hash of the extracted file
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/standard-checksums.txt | awk '{print $1}')
if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
MATCH_COUNT=$((MATCH_COUNT + 1))
else
echo "Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH"
echo "HASH MISMATCH: $FILENAME"
echo "Original: $ORIG_HASH"
echo "Extracted: $EXTRACTED_HASH"
fi
else
echo "File not found in extraction: $FILENAME"
fi
done < /tmp/original-checksums.txt
if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
echo 'SUCCESS: Tarballer extraction hashes match original files!'
echo "SUCCESS: Tarballer extraction hashes match original files!"
else
echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files."
echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match"
return 1
fi
# Similar check for reference tar extraction
# Compare original files to standard tar extraction
MATCH_COUNT=0
# For each original file, find its corresponding extracted file and compare hashes
while read -r line; do
ORIG_HASH=$(echo "$line" | awk '{print $1}')
ORIG_FILE=$(echo "$line" | awk '{print $2}')
ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //')
FILENAME=$(basename "$ORIG_FILE")
# Find the corresponding file in the extracted directory
# Look for the same file in the reference output
EXTRACTED_FILE=$(find /tmp/reference-extracted -name "$FILENAME" | head -1)
if [ -n "$EXTRACTED_FILE" ]; then
# Get the hash of the extracted file
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/reference-checksums.txt | awk '{print $1}')
if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
MATCH_COUNT=$((MATCH_COUNT + 1))
else
echo "Hash mismatch for $FILENAME: original=$ORIG_HASH reference=$EXTRACTED_HASH"
echo "HASH MISMATCH (ref): $FILENAME"
echo "Original: $ORIG_HASH"
echo "Extracted: $EXTRACTED_HASH"
fi
else
echo "File not found in reference extraction: $FILENAME"
fi
done < /tmp/original-checksums.txt
if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
echo 'SUCCESS: Reference tar extraction hashes match original files!'
echo "SUCCESS: Reference tar extraction hashes match original files!"
else
echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files."
echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match"
return 1
fi
echo '=== VERIFYING SYMLINKS ==='
echo 'ORIGINAL SYMLINKS:'
# Test symlinks
echo "=== VERIFYING SYMLINKS ==="
echo "ORIGINAL SYMLINKS:"
find /tmp/standard-test -type l -exec ls -la {} \;
echo 'EXTRACTED SYMLINKS:'
echo "EXTRACTED SYMLINKS:"
find /tmp/standard-extracted -type l -exec ls -la {} \;
# Compare file counts to ensure all files were extracted
echo '=== FILE COUNT COMPARISON ==='
echo -n 'Original files: ' && find /tmp/standard-test -type f | wc -l
echo -n 'Extracted files: ' && find /tmp/standard-extracted -type f | wc -l
# Verify file counts
echo "=== FILE COUNT COMPARISON ==="
echo "Original files: $(find /tmp/standard-test -type f | wc -l)"
echo "Extracted files: $(find /tmp/standard-extracted -type f | wc -l)"
# Test symlink functionality
echo '=== TESTING SYMLINK CONTENT ==='
echo 'Original linked content:'
# Test symlink content
echo "=== TESTING SYMLINK CONTENT ==="
echo "Original linked content:"
cat /tmp/standard-test/data/config-link.json
echo 'Extracted linked content:'
echo "Extracted linked content:"
cat /tmp/standard-extracted/app/data/config-link.json
echo 'Tar comparison test completed successfully!'
echo "Tar comparison test completed successfully!"
return 0
}
# Main script execution
# Main script logic
echo "=== RUNNING ALL TESTS ==="
case "$TEST_TYPE" in
"basic")
run_basic_test
RESULT=$?
[ "$RESULT" -eq 0 ] && cleanup_files
exit $RESULT
;;
"tar")
run_tar_comparison_test
RESULT=$?
[ "$RESULT" -eq 0 ] && cleanup_files
exit $RESULT
;;
"all")
echo "=== RUNNING ALL TESTS ==="
run_basic_test
BASIC_RESULT=$?
run_tar_comparison_test
TAR_RESULT=$?
if [ $BASIC_RESULT -eq 0 ] && [ $TAR_RESULT -eq 0 ]; then
echo "✅ ALL TESTS PASSED SUCCESSFULLY!"
cleanup_files
exit 0
else
echo "❌ SOME TESTS FAILED!"
exit 1
fi
"exclude")
run_exclude_patterns_test
RESULT=$?
;;
"clean")
cleanup_files
exit 0
RESULT=0
;;
*)
echo "Unknown test type: $TEST_TYPE"
echo "Usage: $0 [basic|tar|all|clean] [keep_temp_files]"
echo " keep_temp_files: 0 (clean up, default) or 1 (keep temp files)"
exit 1
"all"|*)
run_basic_test
RESULT1=$?
run_tar_comparison_test
RESULT2=$?
run_exclude_patterns_test
RESULT3=$?
RESULT=$((RESULT1 + RESULT2 + RESULT3))
;;
esac
if [ "$TEST_TYPE" != "clean" ]; then
cleanup_files
fi
if [ "$RESULT" -eq 0 ]; then
echo "✅ ALL TESTS PASSED SUCCESSFULLY!"
exit 0
else
echo "❌ TESTS FAILED WITH ERRORS!"
exit 1
fi