Fix manifest verification to avoid false warnings

This commit is contained in:
Leopere 2025-03-20 22:18:21 -04:00
parent 353ead15ef
commit 9450ff041c
5 changed files with 84 additions and 67 deletions

View File

@ -1,102 +1,89 @@
# Tarballer
A simple utility to create tarballs with a specific directory structure.
A utility to create tarballs with a specific directory structure and built-in MD5 integrity verification.
## Features
- Creates compressed tar archives (.tar.gz) from a source directory
- Places all files under a specified prefix directory in the tarball
- Creates compressed tar archives (.tar.gz) with files under a specified prefix directory
- Preserves file permissions and directory structure
- Handles symbolic links correctly
- Cross-platform compatibility (FreeBSD, macOS, Linux)
- Produces tarballs compatible with standard tar tools
- Verified data integrity with MD5 hash comparison
- Automatically generates and verifies MD5 hashes of all files during extraction
- Compatible with standard tar tools
- Built-in MD5 hash verification
- Automatic file integrity checks during extraction
## Building
This project includes Docker support to build binaries for different platforms:
```bash
docker compose up --build
```
This will create these binaries in the `./bin` directory:
- `tarballer-freebsd`: FreeBSD AMD64 compatible binary
- `tarballer-darwin`: macOS ARM64 compatible binary
- `tarballer-linux`: Linux AMD64 compatible binary
This builds the project and creates binaries in the `./bin` directory:
- `tarballer-freebsd` - FreeBSD AMD64
- `tarballer-darwin` - macOS ARM64
- `tarballer-linux` - Linux AMD64
## Testing
You can run the included tests to verify functionality:
```bash
# Run all tests
docker compose up --build
# Run specific test types
# Run specific test type
docker compose run tarballer /bin/test.sh basic
docker compose run tarballer /bin/test.sh tar
```
All tests run inside the container using its `/tmp` directory, ensuring no temporary files are written to the host filesystem.
All tests run in the container's `/tmp` directory with no files written to the host filesystem.
The tests verify:
1. Creating test directory structures with nested directories and symlinks
2. Creating tarballs from test directories
3. Extracting the tarballs
4. Verifying the contents and file structure, including symlinks
5. Checking that symlinks remain functional after extraction
6. Comparing output with standard tar tools to ensure compatibility
7. Verifying data integrity with MD5 hashing (original vs. extracted files)
Tests verify:
1. Creating and extracting tarballs with nested directories and symlinks
2. File content preservation and structure integrity
3. Symlink functionality after extraction
4. Compatibility with standard tar tools
5. MD5 hash verification
See the `test/README.md` for more details on the test process.
See `test/README.md` for detailed test information.
## Usage
The usage is the same for all binaries:
### Create Mode
```bash
# Create a tarball
./bin/tarballer-<platform> -source /path/to/directory -output myarchive.tar.gz -prefix myprefix
```
# Extract a tarball with integrity verification
Options:
- `-source`: Source directory to compress (required)
- `-output`: Output tarball filename (default: "output.tar.gz")
- `-prefix`: Directory name that will contain all files in the tarball (default: "myapp")
### Extract Mode
```bash
./bin/tarballer-<platform> -extract -output myarchive.tar.gz -extractdir /path/to/extract
```
### Create Mode Options
- `-source`: The directory you want to compress (required)
- `-output`: The name of the output tarball (defaults to "output.tar.gz")
- `-prefix`: The directory name that will contain all files in the tarball (defaults to "myapp")
### Extract Mode Options
Options:
- `-extract`: Enables extraction mode
- `-output`: The tarball to extract (required)
- `-extractdir`: Directory to extract to (defaults to current directory)
- `-output`: Tarball to extract (required)
- `-extractdir`: Extraction directory (default: current directory)
- `-verify`: Only verify hash integrity without extraction
### Examples
```bash
# Create a tarball (on macOS):
# Create a tarball (macOS)
./bin/tarballer-darwin -source ./myproject -output release.tar.gz -prefix app
# Extract and verify (on Linux):
# Extract and verify (Linux)
./bin/tarballer-linux -extract -output release.tar.gz -extractdir /path/to/extract
# Only verify hash integrity without extraction:
# Only verify integrity
./bin/tarballer-linux -extract -verify -output release.tar.gz
```
## MD5 Hash Verification
Tarballer includes built-in file integrity protection:
1. When creating a tarball, MD5 hashes are calculated for all files and stored in a hidden manifest file (`.md5-manifest.txt`) at the root of the extraction directory
2. During extraction, hashes are verified to ensure files haven't been corrupted or tampered with
3. The manifest file is automatically removed after extraction
4. If any file fails verification, the extraction is aborted with an error
This provides an extra layer of security and data integrity validation compared to standard tar tools.
1. During creation, MD5 hashes are calculated for all files and stored in `.md5-manifest.txt`
2. During extraction, file hashes are verified against the manifest
3. The manifest file is removed after successful extraction
4. Extraction aborts with an error if verification fails

Binary file not shown.

Binary file not shown.

Binary file not shown.

64
main.go
View File

@ -53,6 +53,7 @@ func main() {
fmt.Println("Please specify a source directory using -source")
flag.Usage()
os.Exit(1)
}
err := createTarball(*sourceDir, *outputFile, *prefixDir)
@ -146,33 +147,42 @@ func createTarball(sourceDir, outputFile, prefix string) error {
// Make sure the link type is set correctly
header.Typeflag = tar.TypeSymlink
// For symlinks, we don't calculate MD5 hashes
} else if !info.IsDir() {
// Calculate MD5 hash for regular files
hash, err := calcFileMD5(filePath)
if err != nil {
// Write header
if err := tw.WriteHeader(header); err != nil {
return err
}
// No content to write for symlinks
} else if !info.IsDir() {
// Regular file - write header first
if err := tw.WriteHeader(header); err != nil {
return err
}
fileHashes[filepath.Join(prefix, relPath)] = hash
}
// Write header
if err := tw.WriteHeader(header); err != nil {
return err
}
// If it's a file (not a directory or symlink), copy contents
if !info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
// Open the file for reading
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(tw, file)
// Create a multiwriter to write to both the tar archive and MD5 hash
hashWriter := md5.New()
multiWriter := io.MultiWriter(tw, hashWriter)
// Copy file contents to both the tar archive and hash calculator
_, err = io.Copy(multiWriter, file)
file.Close() // Close file after reading
if err != nil {
return err
}
// Store the calculated hash in our map
hashString := hex.EncodeToString(hashWriter.Sum(nil))
fileHashes[header.Name] = hashString
} else {
// For directories, just write the header
if err := tw.WriteHeader(header); err != nil {
return err
}
}
return nil
@ -360,6 +370,7 @@ func extractTarball(tarballPath, extractDir string, verifyOnly bool) error {
// Second pass: Verify hashes
verificationFailed := false
fileVerified := make(map[string]bool)
missingFiles := []string{}
if verifyOnly {
// Extract to temp dir for verification
@ -456,7 +467,26 @@ func extractTarball(tarballPath, extractDir string, verifyOnly bool) error {
// Check if all files in the manifest were verified
for path := range expectedHashes {
if !fileVerified[path] {
fmt.Printf("Warning: File %s in manifest was not found in tarball\n", path)
missingFiles = append(missingFiles, path)
}
}
// Only report missing files if verification is required
if verifyOnly && len(missingFiles) > 0 {
fmt.Printf("Warning: %d files in manifest were not found in the tarball\n", len(missingFiles))
// Optionally, print first few missing files as examples
if len(missingFiles) > 0 {
maxExamples := 5
if len(missingFiles) < maxExamples {
maxExamples = len(missingFiles)
}
fmt.Println("Examples of missing files:")
for i := 0; i < maxExamples; i++ {
fmt.Printf(" - %s\n", missingFiles[i])
}
if len(missingFiles) > maxExamples {
fmt.Printf(" - ...and %d more\n", len(missingFiles)-maxExamples)
}
}
}