Add exclude patterns functionality with tests and documentation

This commit is contained in:
Leopere 2025-03-21 14:56:47 -04:00
parent b546e84afd
commit fdc692c90c
7 changed files with 319 additions and 68 deletions

View File

@ -11,6 +11,7 @@ A utility to create tarballs with a specific directory structure and built-in MD
- Compatible with standard tar tools
- Built-in MD5 hash verification
- Automatic file integrity checks during extraction
- Pattern-based file exclusion for creating targeted archives
## Building
@ -56,6 +57,8 @@ Options:
- `-source`: Source directory to compress (required)
- `-output`: Output tarball filename (default: "output.tar.gz")
- `-prefix`: Directory name that will contain all files in the tarball (default: "myapp")
- `-exclude`: Comma-separated list of patterns to exclude (e.g. "*.log,*.tmp,temp/")
- `-verbose`: Enable detailed output during operation
### Extract Mode
```bash
@ -67,6 +70,7 @@ Options:
- `-output`: Tarball to extract (required)
- `-extractdir`: Extraction directory (default: current directory)
- `-verify`: Only verify hash integrity without extraction
- `-verbose`: Enable detailed output during operation
### Examples
@ -74,9 +78,15 @@ Options:
# Create a tarball (macOS)
./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app
# Create a tarball excluding specific files
./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app -exclude "*.log,bin/,temp/"
# Extract and verify (Linux)
./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract
# Extract with verbose output
./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract -verbose
# Only verify integrity
./bin/tarballer-linux -extract -verify -output release.tar.gz
```
@ -86,4 +96,18 @@ Options:
1. During creation, MD5 hashes are calculated for all files and stored in `.md5-manifest.txt`
2. During extraction, file hashes are verified against the manifest
3. The manifest file is removed after successful extraction
4. Extraction aborts with an error if verification fails
4. Extraction aborts with an error if verification fails
## Exclude Patterns
The `-exclude` flag accepts a comma-separated list of patterns to exclude from the tarball:
- Simple wildcards using `*` (matches any sequence of characters) and `?` (matches any single character)
- Directory patterns (ending with `/`) exclude entire directory trees
- File patterns can match by extension (e.g., `*.log`) or name
Examples:
- `*.log` - Excludes all files with the .log extension
- `bin/` - Excludes the bin directory and all its contents
- `temp/,*.tmp` - Excludes the temp directory and all .tmp files
- `cache/*,*.bak` - Excludes all contents of the cache directory and all .bak files

Binary file not shown.

Binary file not shown.

Binary file not shown.

80
main.go
View File

@ -11,6 +11,7 @@ import (
"io"
"os"
"path/filepath"
"regexp"
"strings"
"time"
)
@ -26,6 +27,7 @@ func main() {
extractDir := flag.String("extractdir", "", "Directory to extract to (default: current directory)")
verifyOnly := flag.Bool("verify", false, "Only verify hash integrity without extraction")
verboseMode := flag.Bool("verbose", false, "Enable verbose output")
excludePatterns := flag.String("exclude", "", "Comma-separated list of patterns to exclude (e.g. \"*.log,*.tmp,temp/\")")
flag.Parse()
if *extractMode {
@ -58,7 +60,29 @@ func main() {
}
err := createTarball(*sourceDir, *outputFile, *prefixDir, *verboseMode)
// Process exclude patterns
var excludeRegexps []*regexp.Regexp
if *excludePatterns != "" {
patterns := strings.Split(*excludePatterns, ",")
for _, pattern := range patterns {
// Trim spaces
pattern = strings.TrimSpace(pattern)
if pattern == "" {
continue
}
// Convert glob pattern to regexp
regexPattern := globToRegexp(pattern)
re, err := regexp.Compile(regexPattern)
if err != nil {
fmt.Printf("Invalid exclude pattern %q: %v\n", pattern, err)
os.Exit(1)
}
excludeRegexps = append(excludeRegexps, re)
}
}
err := createTarball(*sourceDir, *outputFile, *prefixDir, excludeRegexps, *verboseMode)
if err != nil {
fmt.Printf("Error creating tarball: %v\n", err)
os.Exit(1)
@ -68,6 +92,29 @@ func main() {
}
}
// globToRegexp converts a glob pattern (*.log) to a regexp pattern (.*\.log$)
func globToRegexp(pattern string) string {
// Escape special regexp chars that aren't special in glob
pattern = regexp.QuoteMeta(pattern)
// Convert glob * to regex .*
pattern = strings.ReplaceAll(pattern, "\\*", ".*")
// Convert glob ? to regex .
pattern = strings.ReplaceAll(pattern, "\\?", ".")
// Ensure pattern matches the entire filename
if strings.HasSuffix(pattern, "/") {
// For directory patterns, match any path containing this directory
pattern = pattern + ".*"
} else {
// For file patterns, match at the end of the path
pattern = pattern + "$"
}
return pattern
}
// calcFileMD5 calculates the MD5 hash of a file
func calcFileMD5(filePath string) (string, error) {
file, err := os.Open(filePath)
@ -84,7 +131,7 @@ func calcFileMD5(filePath string) (string, error) {
return hex.EncodeToString(hash.Sum(nil)), nil
}
func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error {
func createTarball(sourceDir, outputFile, prefix string, excludePatterns []*regexp.Regexp, verboseMode bool) error {
// Resolve absolute path of source directory
absSourceDir, err := filepath.Abs(sourceDir)
if err != nil {
@ -97,6 +144,12 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
if prefix != "" {
fmt.Printf("Using prefix: %s\n", prefix)
}
if len(excludePatterns) > 0 {
fmt.Println("Using exclude patterns:")
for i, pattern := range excludePatterns {
fmt.Printf(" %d: %s\n", i+1, pattern)
}
}
}
// Create the output file
@ -120,6 +173,7 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
// Create a map to store file hashes
hashes := make(map[string]string)
fileCount := 0
skippedCount := 0
// Walk through the source directory
err = filepath.Walk(absSourceDir, func(path string, info os.FileInfo, err error) error {
@ -143,6 +197,23 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
return nil
}
// Check if file matches any exclude patterns
if len(excludePatterns) > 0 {
relPathForward := filepath.ToSlash(relPath)
for _, pattern := range excludePatterns {
if pattern.MatchString(relPathForward) {
if verboseMode {
fmt.Printf("Excluding: %s (matched pattern)\n", relPathForward)
}
skippedCount++
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
}
}
// Add prefix if specified
if prefix != "" {
relPath = filepath.Join(prefix, relPath)
@ -233,7 +304,10 @@ func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error
}
if verboseMode {
fmt.Printf("Added %d files to tarball\n", fileCount)
fmt.Printf("Added %d files to tarball\n", fileCount-skippedCount)
if skippedCount > 0 {
fmt.Printf("Excluded %d files/directories\n", skippedCount)
}
fmt.Println("Creating MD5 manifest...")
}

View File

@ -57,10 +57,47 @@ If you need to manually test the tarballer utility, you can:
# Create a tarball
./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp
# Testing exclude patterns
./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp -exclude "*.log,temp/"
# Test with verbose output
./bin/tarballer-darwin -source /path/to/source -output output.tar.gz -prefix myapp -verbose
# Extract and verify a tarball
./bin/tarballer-darwin -extract -output output.tar.gz -extractdir /path/to/extract
```
## Testing Exclude Patterns
To test the exclude patterns feature:
1. Create a directory with various file types:
```bash
mkdir -p test-dir/logs test-dir/bin test-dir/src
touch test-dir/file1.txt test-dir/file2.txt
touch test-dir/logs/app.log test-dir/logs/error.log
touch test-dir/bin/executable
touch test-dir/src/main.go test-dir/src/util.go
```
2. Test with various exclude patterns:
```bash
# Exclude all .log files
./bin/tarballer-darwin -source test-dir -output test1.tar.gz -prefix test -exclude "*.log" -verbose
# Exclude entire directories
./bin/tarballer-darwin -source test-dir -output test2.tar.gz -prefix test -exclude "logs/,bin/" -verbose
# Exclude multiple patterns
./bin/tarballer-darwin -source test-dir -output test3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose
```
3. Extract and verify that exclusions worked:
```bash
./bin/tarballer-darwin -extract -output test1.tar.gz -extractdir test1-extracted -verbose
find test1-extracted -type f | grep ".log" # Should return nothing
```
## Modifying Tests
When modifying tests, keep in mind that the test script uses the container's `/tmp` directory for all temporary files. This keeps the test process self-contained within the container.

View File

@ -27,6 +27,8 @@ cleanup_files() {
rm -rf /tmp/standard-test /tmp/standard-extracted /tmp/reference-extracted
rm -f /tmp/original-checksums.txt /tmp/standard-checksums.txt /tmp/reference-checksums.txt
rm -f /workdir/complex.tar.gz /workdir/standard.tar.gz /workdir/reference.tar.gz
rm -rf /tmp/exclude-test /tmp/exclude-extracted
rm -f /workdir/exclude.tar.gz
echo "Temporary files cleaned up"
else
echo "Keeping temporary files for inspection"
@ -160,6 +162,113 @@ run_basic_test() {
return 0
}
run_exclude_patterns_test() {
echo "=== RUNNING EXCLUDE PATTERNS TEST ==="
# Clean up test directories
rm -rf /tmp/exclude-test /tmp/exclude-extracted
rm -f /workdir/exclude.tar.gz
# Create test directory structure
mkdir -p /tmp/exclude-test/logs /tmp/exclude-test/temp
mkdir -p /tmp/exclude-test/src/lib /tmp/exclude-test/bin
mkdir -p /tmp/exclude-test/data
# Create various file types
echo "Main text file" > /tmp/exclude-test/main.txt
echo "Config file" > /tmp/exclude-test/config.ini
# Log files (to be excluded with pattern *.log)
echo "Log file 1" > /tmp/exclude-test/logs/app.log
echo "Log file 2" > /tmp/exclude-test/logs/error.log
# Temporary files (to be excluded with pattern temp/)
echo "Temp file 1" > /tmp/exclude-test/temp/cache.tmp
echo "Temp file 2" > /tmp/exclude-test/temp/session.tmp
# Source files (some to be excluded with pattern *.go)
echo "Source file Go" > /tmp/exclude-test/src/main.go
echo "Source file C" > /tmp/exclude-test/src/helper.c
echo "Source file Go lib" > /tmp/exclude-test/src/lib/utils.go
echo "Source file C lib" > /tmp/exclude-test/src/lib/core.c
# Binary files (to be excluded with pattern bin/)
generate_random_file "/tmp/exclude-test/bin/app" 1024
generate_random_file "/tmp/exclude-test/bin/tool" 512
# Data files (not to be excluded)
generate_random_file "/tmp/exclude-test/data/data1.bin" 256
generate_random_file "/tmp/exclude-test/data/data2.bin" 128
# List original structure
echo "=== ORIGINAL STRUCTURE ==="
find /tmp/exclude-test -type f | sort
echo "Total files: $(find /tmp/exclude-test -type f | wc -l)"
# Test excluding *.log files
echo "=== TEST 1: EXCLUDING *.log FILES ==="
/bin/tarballer -source /tmp/exclude-test -output /workdir/exclude1.tar.gz -prefix test -exclude "*.log" -verbose
mkdir -p /tmp/exclude-extracted/test1
tar -xzf /workdir/exclude1.tar.gz -C /tmp/exclude-extracted/test1
echo "=== EXTRACTED STRUCTURE (WITHOUT LOGS) ==="
find /tmp/exclude-extracted/test1 -type f | sort
echo "Total files: $(find /tmp/exclude-extracted/test1 -type f | wc -l)"
# Check that no .log files exist in the extracted archive
LOG_FILES=$(find /tmp/exclude-extracted/test1 -name "*.log" | wc -l)
if [ "$LOG_FILES" -eq 0 ]; then
echo "SUCCESS: No .log files found in the extracted archive"
else
echo "ERROR: Found .log files in the extracted archive"
return 1
fi
# Test excluding directories
echo "=== TEST 2: EXCLUDING DIRECTORIES (temp/ and bin/) ==="
/bin/tarballer -source /tmp/exclude-test -output /workdir/exclude2.tar.gz -prefix test -exclude "temp/,bin/" -verbose
mkdir -p /tmp/exclude-extracted/test2
tar -xzf /workdir/exclude2.tar.gz -C /tmp/exclude-extracted/test2
echo "=== EXTRACTED STRUCTURE (WITHOUT temp/ AND bin/) ==="
find /tmp/exclude-extracted/test2 -type f | sort
echo "Total files: $(find /tmp/exclude-extracted/test2 -type f | wc -l)"
# Check that the excluded directories don't exist in the extracted archive
EXCLUDED_DIRS=$(find /tmp/exclude-extracted/test2 -path "*/temp" -o -path "*/bin" | wc -l)
if [ "$EXCLUDED_DIRS" -eq 0 ]; then
echo "SUCCESS: No temp/ or bin/ directories found in the extracted archive"
else
echo "ERROR: Found excluded directories in the extracted archive"
return 1
fi
# Test excluding multiple patterns
echo "=== TEST 3: EXCLUDING MULTIPLE PATTERNS (*.log, *.go, bin/) ==="
/bin/tarballer -source /tmp/exclude-test -output /workdir/exclude3.tar.gz -prefix test -exclude "*.log,*.go,bin/" -verbose
mkdir -p /tmp/exclude-extracted/test3
tar -xzf /workdir/exclude3.tar.gz -C /tmp/exclude-extracted/test3
echo "=== EXTRACTED STRUCTURE (WITH MULTIPLE EXCLUSIONS) ==="
find /tmp/exclude-extracted/test3 -type f | sort
echo "Total files: $(find /tmp/exclude-extracted/test3 -type f | wc -l)"
# Check all exclusions
EXCLUDED_FILES=$(find /tmp/exclude-extracted/test3 -name "*.log" -o -name "*.go" -o -path "*/bin/*" | wc -l)
if [ "$EXCLUDED_FILES" -eq 0 ]; then
echo "SUCCESS: All excluded patterns are working correctly"
else
echo "ERROR: Found files that should have been excluded"
return 1
fi
echo "Exclude patterns test completed successfully!"
return 0
}
run_tar_comparison_test() {
echo "=== RUNNING TAR COMPARISON TEST ==="
@ -248,121 +357,128 @@ EOF
# For each original file, find its corresponding extracted file and compare hashes
while read -r line; do
ORIG_HASH=$(echo "$line" | awk '{print $1}')
ORIG_FILE=$(echo "$line" | awk '{print $2}')
ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //')
FILENAME=$(basename "$ORIG_FILE")
# Find the corresponding file in the extracted directory
EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | grep -v ".md5-manifest.txt" | head -1)
# Look for the same file in the tarballer output
EXTRACTED_FILE=$(find /tmp/standard-extracted -name "$FILENAME" | head -1)
if [ -n "$EXTRACTED_FILE" ]; then
# Get the hash of the extracted file
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/standard-checksums.txt | awk '{print $1}')
if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
MATCH_COUNT=$((MATCH_COUNT + 1))
else
echo "Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH"
echo "HASH MISMATCH: $FILENAME"
echo "Original: $ORIG_HASH"
echo "Extracted: $EXTRACTED_HASH"
fi
else
echo "File not found in extraction: $FILENAME"
fi
done < /tmp/original-checksums.txt
if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
echo 'SUCCESS: Tarballer extraction hashes match original files!'
echo "SUCCESS: Tarballer extraction hashes match original files!"
else
echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files."
echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match"
return 1
fi
# Similar check for reference tar extraction
# Compare original files to standard tar extraction
MATCH_COUNT=0
# For each original file, find its corresponding extracted file and compare hashes
while read -r line; do
ORIG_HASH=$(echo "$line" | awk '{print $1}')
ORIG_FILE=$(echo "$line" | awk '{print $2}')
ORIG_FILE=$(echo "$line" | awk '{$1=""; print $0}' | sed 's/^ //')
FILENAME=$(basename "$ORIG_FILE")
# Find the corresponding file in the extracted directory
# Look for the same file in the reference output
EXTRACTED_FILE=$(find /tmp/reference-extracted -name "$FILENAME" | head -1)
if [ -n "$EXTRACTED_FILE" ]; then
# Get the hash of the extracted file
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /tmp/reference-checksums.txt | awk '{print $1}')
if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
MATCH_COUNT=$((MATCH_COUNT + 1))
else
echo "Hash mismatch for $FILENAME: original=$ORIG_HASH reference=$EXTRACTED_HASH"
echo "HASH MISMATCH (ref): $FILENAME"
echo "Original: $ORIG_HASH"
echo "Extracted: $EXTRACTED_HASH"
fi
else
echo "File not found in reference extraction: $FILENAME"
fi
done < /tmp/original-checksums.txt
if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
echo 'SUCCESS: Reference tar extraction hashes match original files!'
echo "SUCCESS: Reference tar extraction hashes match original files!"
else
echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files."
echo "ERROR: Only $MATCH_COUNT of $EXPECTED_COUNT hashes match"
return 1
fi
echo '=== VERIFYING SYMLINKS ==='
echo 'ORIGINAL SYMLINKS:'
# Test symlinks
echo "=== VERIFYING SYMLINKS ==="
echo "ORIGINAL SYMLINKS:"
find /tmp/standard-test -type l -exec ls -la {} \;
echo 'EXTRACTED SYMLINKS:'
echo "EXTRACTED SYMLINKS:"
find /tmp/standard-extracted -type l -exec ls -la {} \;
# Compare file counts to ensure all files were extracted
echo '=== FILE COUNT COMPARISON ==='
echo -n 'Original files: ' && find /tmp/standard-test -type f | wc -l
echo -n 'Extracted files: ' && find /tmp/standard-extracted -type f | wc -l
# Test symlink functionality
echo '=== TESTING SYMLINK CONTENT ==='
echo 'Original linked content:'
# Verify file counts
echo "=== FILE COUNT COMPARISON ==="
echo "Original files: $(find /tmp/standard-test -type f | wc -l)"
echo "Extracted files: $(find /tmp/standard-extracted -type f | wc -l)"
# Test symlink content
echo "=== TESTING SYMLINK CONTENT ==="
echo "Original linked content:"
cat /tmp/standard-test/data/config-link.json
echo 'Extracted linked content:'
echo "Extracted linked content:"
cat /tmp/standard-extracted/app/data/config-link.json
echo 'Tar comparison test completed successfully!'
echo "Tar comparison test completed successfully!"
return 0
}
# Main script execution
# Main script logic
echo "=== RUNNING ALL TESTS ==="
case "$TEST_TYPE" in
"basic")
"basic")
run_basic_test
RESULT=$?
[ "$RESULT" -eq 0 ] && cleanup_files
exit $RESULT
;;
"tar")
"tar")
run_tar_comparison_test
RESULT=$?
[ "$RESULT" -eq 0 ] && cleanup_files
exit $RESULT
;;
"all")
echo "=== RUNNING ALL TESTS ==="
run_basic_test
BASIC_RESULT=$?
run_tar_comparison_test
TAR_RESULT=$?
if [ $BASIC_RESULT -eq 0 ] && [ $TAR_RESULT -eq 0 ]; then
echo "✅ ALL TESTS PASSED SUCCESSFULLY!"
cleanup_files
exit 0
else
echo "❌ SOME TESTS FAILED!"
exit 1
fi
"exclude")
run_exclude_patterns_test
RESULT=$?
;;
"clean")
cleanup_files
exit 0
RESULT=0
;;
*)
echo "Unknown test type: $TEST_TYPE"
echo "Usage: $0 [basic|tar|all|clean] [keep_temp_files]"
echo " keep_temp_files: 0 (clean up, default) or 1 (keep temp files)"
exit 1
"all"|*)
run_basic_test
RESULT1=$?
run_tar_comparison_test
RESULT2=$?
run_exclude_patterns_test
RESULT3=$?
RESULT=$((RESULT1 + RESULT2 + RESULT3))
;;
esac
esac
if [ "$TEST_TYPE" != "clean" ]; then
cleanup_files
fi
if [ "$RESULT" -eq 0 ]; then
echo "✅ ALL TESTS PASSED SUCCESSFULLY!"
exit 0
else
echo "❌ TESTS FAILED WITH ERRORS!"
exit 1
fi