Store MD5 manifest as hidden file at root of extraction directory

This commit is contained in:
Leopere 2025-03-20 18:41:10 -04:00
parent b6c808ed75
commit fa02d33791
6 changed files with 576 additions and 34 deletions

View File

@ -11,6 +11,7 @@ A simple utility to create tarballs with a specific directory structure.
- Cross-platform compatibility (FreeBSD, macOS, Linux)
- Produces tarballs compatible with standard tar tools
- Verified data integrity with MD5 hash comparison
- Automatically generates and verifies MD5 hashes of all files during extraction
## Building
@ -51,27 +52,47 @@ The tests will:
The usage is the same for all binaries:
```bash
# Create a tarball
./bin/tarballer-<platform> -source /path/to/directory -output myarchive.tar.gz -prefix myprefix
# Extract a tarball with integrity verification
./bin/tarballer-<platform> -extract -output myarchive.tar.gz -extractdir /path/to/extract
```
### Options
### Create Mode Options
- `-source`: The directory you want to compress (required)
- `-output`: The name of the output tarball (defaults to "output.tar.gz")
- `-prefix`: The directory name that will contain all files in the tarball (defaults to "myapp")
### Example
### Extract Mode Options
- `-extract`: Enables extraction mode
- `-output`: The tarball to extract (required)
- `-extractdir`: Directory to extract to (defaults to current directory)
- `-verify`: Only verify hash integrity without extraction
- `-keepmanifest`: Keep the MD5 manifest file after extraction (defaults to removing it)
### Examples
```bash
# On macOS:
# Create a tarball (on macOS):
./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app
# On FreeBSD:
./bin/tarballer-freebsd -source ./myproject -output release.tar.gz -prefix app
# Extract and verify (on Linux):
./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract
# Only verify hash integrity without extraction:
./bin/tarballer-linux -extract -verify -output release.tar.gz
```
When extracted, all files will be under the `app/` directory in the tarball and can be extracted with standard tools like:
## MD5 Hash Verification
```bash
tar -xzf release.tar.gz -C /path/to/extract
```
Tarballer includes built-in file integrity protection:
1. When creating a tarball, MD5 hashes are calculated for all files and stored in a hidden manifest file (`.md5-manifest.txt`) at the root of the extraction directory
2. During extraction, hashes are verified to ensure files haven't been corrupted or tampered with
3. The manifest file is automatically removed after extraction unless `-keepmanifest` is specified
4. If any file fails verification, the extraction is aborted with an error
This provides an extra layer of security and data integrity validation compared to standard tar tools.

Binary file not shown.

Binary file not shown.

Binary file not shown.

473
main.go
View File

@ -2,34 +2,84 @@ package main
import (
"archive/tar"
"bufio"
"compress/gzip"
"crypto/md5"
"encoding/hex"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
const manifestFilename = ".md5-manifest.txt"
func main() {
// Define command line flags
sourceDir := flag.String("source", "", "Source directory to compress")
outputFile := flag.String("output", "output.tar.gz", "Output tarball filename")
prefixDir := flag.String("prefix", "myapp", "Directory prefix in tarball")
extractMode := flag.Bool("extract", false, "Extract mode (instead of create)")
extractDir := flag.String("extractdir", "", "Directory to extract to (default: current directory)")
verifyOnly := flag.Bool("verify", false, "Only verify hash integrity without extraction")
keepManifest := flag.Bool("keepmanifest", false, "Keep the MD5 manifest file after extraction")
flag.Parse()
if *sourceDir == "" {
fmt.Println("Please specify a source directory using -source")
flag.Usage()
os.Exit(1)
}
if *extractMode {
if *outputFile == "output.tar.gz" && len(flag.Args()) > 0 {
*outputFile = flag.Args()[0]
}
err := createTarball(*sourceDir, *outputFile, *prefixDir)
if *outputFile == "" {
fmt.Println("Please specify a tarball to extract using -output or as a positional argument")
flag.Usage()
os.Exit(1)
}
// If extract directory is not specified, use current directory
extractTo := *extractDir
if extractTo == "" {
extractTo = "."
}
err := extractTarball(*outputFile, extractTo, *verifyOnly, *keepManifest)
if err != nil {
fmt.Printf("Error extracting tarball: %v\n", err)
os.Exit(1)
}
} else {
if *sourceDir == "" {
fmt.Println("Please specify a source directory using -source")
flag.Usage()
os.Exit(1)
}
err := createTarball(*sourceDir, *outputFile, *prefixDir)
if err != nil {
fmt.Printf("Error creating tarball: %v\n", err)
os.Exit(1)
}
fmt.Printf("Successfully created %s with prefix %s\n", *outputFile, *prefixDir)
}
}
// calcFileMD5 calculates the MD5 hash of a file
func calcFileMD5(filePath string) (string, error) {
file, err := os.Open(filePath)
if err != nil {
fmt.Printf("Error creating tarball: %v\n", err)
os.Exit(1)
return "", err
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return "", err
}
fmt.Printf("Successfully created %s with prefix %s\n", *outputFile, *prefixDir)
return hex.EncodeToString(hash.Sum(nil)), nil
}
func createTarball(sourceDir, outputFile, prefix string) error {
@ -48,6 +98,9 @@ func createTarball(sourceDir, outputFile, prefix string) error {
tw := tar.NewWriter(gw)
defer tw.Close()
// Create a map to store MD5 hashes
fileHashes := make(map[string]string)
// Resolve absolute source path to handle relative symlinks correctly
sourceDir, err = filepath.Abs(sourceDir)
if err != nil {
@ -66,6 +119,11 @@ func createTarball(sourceDir, outputFile, prefix string) error {
return err
}
// Skip the manifest file if it exists (from a previous run)
if relPath == manifestFilename {
return nil
}
// Create tar header using original file info
header, err := tar.FileInfoHeader(info, "")
if err != nil {
@ -88,6 +146,15 @@ func createTarball(sourceDir, outputFile, prefix string) error {
// Make sure the link type is set correctly
header.Typeflag = tar.TypeSymlink
// For symlinks, we don't calculate MD5 hashes
} else if !info.IsDir() {
// Calculate MD5 hash for regular files
hash, err := calcFileMD5(filePath)
if err != nil {
return err
}
fileHashes[filepath.Join(prefix, relPath)] = hash
}
// Write header
@ -112,5 +179,391 @@ func createTarball(sourceDir, outputFile, prefix string) error {
return nil
})
return err
if err != nil {
return err
}
// Create and add the manifest file
var manifestContent strings.Builder
for path, hash := range fileHashes {
manifestContent.WriteString(fmt.Sprintf("%s %s\n", hash, path))
}
// Create a tar header for the manifest
manifestHeader := &tar.Header{
Name: manifestFilename,
Mode: 0644,
Size: int64(manifestContent.Len()),
Typeflag: tar.TypeReg,
}
// Write the manifest header
if err := tw.WriteHeader(manifestHeader); err != nil {
return err
}
// Write the manifest content
if _, err := tw.Write([]byte(manifestContent.String())); err != nil {
return err
}
return nil
}
func extractTarball(tarballPath, extractDir string, verifyOnly, keepManifest bool) error {
// Open the tarball
file, err := os.Open(tarballPath)
if err != nil {
return err
}
defer file.Close()
// Create gzip reader
gr, err := gzip.NewReader(file)
if err != nil {
return err
}
defer gr.Close()
// Create tar reader
tr := tar.NewReader(gr)
// Create a map to store expected MD5 hashes from the manifest
expectedHashes := make(map[string]string)
// First pass: Find and parse the manifest file
tempDir := ""
if !verifyOnly {
// Create a temporary directory for extraction
tempDir, err = os.MkdirTemp(extractDir, "tarballer-extract-")
if err != nil {
return err
}
}
// Extract files to get the manifest
for {
header, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return err
}
// Skip directories in first pass, we only want to find the manifest
if header.Typeflag == tar.TypeDir {
continue
}
// Check if this is the manifest file
if filepath.Base(header.Name) == manifestFilename && filepath.Dir(header.Name) == "." {
// Read the manifest content
var content strings.Builder
if _, err := io.Copy(&content, tr); err != nil {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return err
}
// Parse the manifest to get expected hashes
scanner := bufio.NewScanner(strings.NewReader(content.String()))
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, " ", 2)
if len(parts) == 2 {
hash := parts[0]
path := parts[1]
expectedHashes[path] = hash
}
}
if err := scanner.Err(); err != nil {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return fmt.Errorf("error parsing manifest: %v", err)
}
continue
}
// If we're only verifying, skip extraction
if verifyOnly {
continue
}
// Extract to temp dir to verify hashes
target := filepath.Join(tempDir, header.Name)
// Create directory if needed
if header.Typeflag == tar.TypeDir {
if err := os.MkdirAll(target, 0755); err != nil {
os.RemoveAll(tempDir)
return err
}
continue
}
// Create parent directory if it doesn't exist
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
os.RemoveAll(tempDir)
return err
}
// Handle symlinks
if header.Typeflag == tar.TypeSymlink {
if err := os.Symlink(header.Linkname, target); err != nil {
os.RemoveAll(tempDir)
return err
}
continue
}
// Create regular file
f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, os.FileMode(header.Mode))
if err != nil {
os.RemoveAll(tempDir)
return err
}
// Copy file content
if _, err := io.Copy(f, tr); err != nil {
f.Close()
os.RemoveAll(tempDir)
return err
}
f.Close()
}
// If no manifest was found
if len(expectedHashes) == 0 {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return fmt.Errorf("no MD5 manifest found in tarball")
}
// If we're only verifying, we need to reopen the tarball
if verifyOnly {
file.Seek(0, 0)
gr, err = gzip.NewReader(file)
if err != nil {
return err
}
defer gr.Close()
tr = tar.NewReader(gr)
}
// Second pass: Verify hashes
verificationFailed := false
fileVerified := make(map[string]bool)
if verifyOnly {
// Extract to temp dir for verification
tempDir, err = os.MkdirTemp(extractDir, "tarballer-verify-")
if err != nil {
return err
}
defer os.RemoveAll(tempDir)
}
for {
header, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if tempDir != "" && !verifyOnly {
os.RemoveAll(tempDir)
}
return err
}
// Skip directories and the manifest file for verification
if header.Typeflag == tar.TypeDir || (filepath.Base(header.Name) == manifestFilename && filepath.Dir(header.Name) == ".") {
continue
}
// Skip symlinks for hash verification
if header.Typeflag == tar.TypeSymlink {
continue
}
// Check if this file has an expected hash
expectedHash, exists := expectedHashes[header.Name]
if !exists {
fmt.Printf("Warning: File %s not found in manifest\n", header.Name)
continue
}
// If verifyOnly, we need to extract the file to verify its hash
if verifyOnly {
target := filepath.Join(tempDir, header.Name)
// Create parent directory if it doesn't exist
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
return err
}
// Create file
f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, os.FileMode(header.Mode))
if err != nil {
return err
}
// Copy content
if _, err := io.Copy(f, tr); err != nil {
f.Close()
return err
}
f.Close()
// Calculate hash
actualHash, err := calcFileMD5(target)
if err != nil {
return err
}
// Verify hash
if actualHash != expectedHash {
fmt.Printf("Hash mismatch for %s: expected %s, got %s\n", header.Name, expectedHash, actualHash)
verificationFailed = true
} else {
fileVerified[header.Name] = true
}
} else {
// Calculate hash from extracted file
target := filepath.Join(tempDir, header.Name)
actualHash, err := calcFileMD5(target)
if err != nil {
os.RemoveAll(tempDir)
return err
}
// Verify hash
if actualHash != expectedHash {
fmt.Printf("Hash mismatch for %s: expected %s, got %s\n", header.Name, expectedHash, actualHash)
verificationFailed = true
} else {
fileVerified[header.Name] = true
}
}
}
// Check if all files in the manifest were verified
for path := range expectedHashes {
if !fileVerified[path] {
fmt.Printf("Warning: File %s in manifest was not found in tarball\n", path)
}
}
// If verification failed or we're only verifying, we're done
if verificationFailed {
if !verifyOnly {
os.RemoveAll(tempDir)
}
return fmt.Errorf("hash verification failed for one or more files")
}
if verifyOnly {
fmt.Println("All files verified successfully!")
return nil
}
// Move the extracted files to the final destination (excluding manifest if needed)
files, err := os.ReadDir(tempDir)
if err != nil {
os.RemoveAll(tempDir)
return err
}
// Create the final extract directory if it doesn't exist
if err := os.MkdirAll(extractDir, 0755); err != nil {
os.RemoveAll(tempDir)
return err
}
// Move each top-level extracted item
for _, f := range files {
source := filepath.Join(tempDir, f.Name())
dest := filepath.Join(extractDir, f.Name())
// Skip the manifest file if needed
if !keepManifest && (f.Name() == manifestFilename) {
continue
}
// If destination already exists, remove it
if _, err := os.Stat(dest); err == nil {
if err := os.RemoveAll(dest); err != nil {
os.RemoveAll(tempDir)
return err
}
}
// Move the file
if err := os.Rename(source, dest); err != nil {
// If rename fails (e.g., across devices), try copying
err = filepath.Walk(source, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Get relative path
relPath, err := filepath.Rel(source, path)
if err != nil {
return err
}
targetPath := filepath.Join(dest, relPath)
// Create directory
if info.IsDir() {
return os.MkdirAll(targetPath, info.Mode())
}
// Handle symlinks
if info.Mode()&os.ModeSymlink != 0 {
linkTarget, err := os.Readlink(path)
if err != nil {
return err
}
return os.Symlink(linkTarget, targetPath)
}
// Copy file
srcFile, err := os.Open(path)
if err != nil {
return err
}
defer srcFile.Close()
// Create destination file
dstFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY, info.Mode())
if err != nil {
return err
}
defer dstFile.Close()
// Copy content
_, err = io.Copy(dstFile, srcFile)
return err
})
if err != nil {
os.RemoveAll(tempDir)
return err
}
}
}
// Clean up temp directory
os.RemoveAll(tempDir)
fmt.Println("Extraction completed and verified successfully!")
return nil
}

View File

@ -90,10 +90,37 @@ run_basic_test() {
# Verify hash comparison
echo '=== COMPARING FILE HASHES ==='
ORIG_HASHES=$(cat /test/complex-original-md5.txt | awk '{print $1}' | sort)
EXTR_HASHES=$(cat /test/complex-extracted-md5.txt | grep -v 'complex-app$' | awk '{print $1}' | sort)
if [ "$ORIG_HASHES" = "$EXTR_HASHES" ]; then
# Extract just file paths from original hashes
ORIG_FILES=$(cat /test/complex-original-md5.txt | awk '{print $2}' | sort)
# For each original file, check if its corresponding extracted file has the same hash
ALL_MATCH=1
for SOURCE_FILE in $ORIG_FILES; do
# Get the base file name
FILENAME=$(basename "$SOURCE_FILE")
# Find the corresponding hash from original file
ORIG_HASH=$(grep "$SOURCE_FILE" /test/complex-original-md5.txt | awk '{print $1}')
# Find the corresponding file in the extracted directory and get its hash
EXTRACTED_FILE=$(find /test/complex-extracted -name "$FILENAME" | head -1)
if [ -z "$EXTRACTED_FILE" ]; then
echo "ERROR: File $FILENAME not found in extracted directory"
ALL_MATCH=0
continue
fi
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /test/complex-extracted-md5.txt | awk '{print $1}')
if [ "$ORIG_HASH" != "$EXTRACTED_HASH" ]; then
echo "ERROR: Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH"
ALL_MATCH=0
fi
done
if [ "$ALL_MATCH" -eq 1 ]; then
echo 'SUCCESS: All file hashes match between original and extracted files!'
else
echo 'ERROR: Hash mismatch detected!'
@ -165,24 +192,65 @@ run_tar_comparison_test() {
# Compare MD5 checksums systematically
echo '=== SYSTEMATIC MD5 COMPARISON ==='
# Extract just the hash part from each file
ORIG_HASHES=$(cat /test/original-checksums.txt | awk '{print $1}' | sort)
TARB_HASHES=$(cat /test/standard-checksums.txt | awk '{print $1}' | sort)
REF_HASHES=$(cat /test/reference-checksums.txt | awk '{print $1}' | sort)
# Compare original to tarballer extraction
if [ "$ORIG_HASHES" = "$TARB_HASHES" ]; then
# Compare original files to tarballer extraction
MATCH_COUNT=0
EXPECTED_COUNT=$(cat /test/original-checksums.txt | wc -l)
# For each original file, find its corresponding extracted file and compare hashes
while read -r line; do
ORIG_HASH=$(echo "$line" | awk '{print $1}')
ORIG_FILE=$(echo "$line" | awk '{print $2}')
FILENAME=$(basename "$ORIG_FILE")
# Find the corresponding file in the extracted directory
EXTRACTED_FILE=$(find /test/standard-extracted -name "$FILENAME" | grep -v ".md5-manifest.txt" | head -1)
if [ -n "$EXTRACTED_FILE" ]; then
# Get the hash of the extracted file
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /test/standard-checksums.txt | awk '{print $1}')
if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
MATCH_COUNT=$((MATCH_COUNT + 1))
else
echo "Hash mismatch for $FILENAME: original=$ORIG_HASH extracted=$EXTRACTED_HASH"
fi
fi
done < /test/original-checksums.txt
if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
echo 'SUCCESS: Tarballer extraction hashes match original files!'
else
echo 'ERROR: Hash mismatch detected between original and tarballer extraction!'
echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files."
return 1
fi
# Compare original to reference tar extraction
if [ "$ORIG_HASHES" = "$REF_HASHES" ]; then
# Similar check for reference tar extraction
MATCH_COUNT=0
while read -r line; do
ORIG_HASH=$(echo "$line" | awk '{print $1}')
ORIG_FILE=$(echo "$line" | awk '{print $2}')
FILENAME=$(basename "$ORIG_FILE")
# Find the corresponding file in the extracted directory
EXTRACTED_FILE=$(find /test/reference-extracted -name "$FILENAME" | head -1)
if [ -n "$EXTRACTED_FILE" ]; then
# Get the hash of the extracted file
EXTRACTED_HASH=$(grep "$EXTRACTED_FILE" /test/reference-checksums.txt | awk '{print $1}')
if [ "$ORIG_HASH" = "$EXTRACTED_HASH" ]; then
MATCH_COUNT=$((MATCH_COUNT + 1))
else
echo "Hash mismatch for $FILENAME: original=$ORIG_HASH reference=$EXTRACTED_HASH"
fi
fi
done < /test/original-checksums.txt
if [ "$MATCH_COUNT" -eq "$EXPECTED_COUNT" ]; then
echo 'SUCCESS: Reference tar extraction hashes match original files!'
else
echo 'ERROR: Hash mismatch detected between original and reference tar extraction!'
echo "ERROR: Hash mismatch detected! Matched $MATCH_COUNT of $EXPECTED_COUNT files."
return 1
fi