tarballer/main.go

599 lines
14 KiB
Go

package main
import (
"archive/tar"
"bufio"
"compress/gzip"
"crypto/md5"
"encoding/hex"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
const manifestFilename = ".md5-manifest.txt"
func main() {
// Define command line flags
sourceDir := flag.String("source", "", "Source directory to compress")
outputFile := flag.String("output", "output.tar.gz", "Output tarball filename")
prefixDir := flag.String("prefix", "myapp", "Directory prefix in tarball")
extractMode := flag.Bool("extract", false, "Extract mode (instead of create)")
extractDir := flag.String("extractdir", "", "Directory to extract to (default: current directory)")
verifyOnly := flag.Bool("verify", false, "Only verify hash integrity without extraction")
flag.Parse()
if *extractMode {
if *outputFile == "output.tar.gz" && len(flag.Args()) > 0 {
*outputFile = flag.Args()[0]
}
if *outputFile == "" {
fmt.Println("Please specify a tarball to extract using -output or as a positional argument")
flag.Usage()
os.Exit(1)
}
// If extract directory is not specified, use current directory
extractTo := *extractDir
if extractTo == "" {
extractTo = "."
}
err := extractTarball(*outputFile, extractTo, *verifyOnly)
if err != nil {
fmt.Printf("Error extracting tarball: %v\n", err)
os.Exit(1)
}
} else {
if *sourceDir == "" {
fmt.Println("Please specify a source directory using -source")
flag.Usage()
os.Exit(1)
}
err := createTarball(*sourceDir, *outputFile, *prefixDir)
if err != nil {
fmt.Printf("Error creating tarball: %v\n", err)
os.Exit(1)
}
fmt.Printf("Successfully created %s with prefix %s\n", *outputFile, *prefixDir)
}
}
// calcFileMD5 calculates the MD5 hash of a file
func calcFileMD5(filePath string) (string, error) {
file, err := os.Open(filePath)
if err != nil {
return "", err
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return "", err
}
return hex.EncodeToString(hash.Sum(nil)), nil
}
func createTarball(sourceDir, outputFile, prefix string) error {
// Create output file
out, err := os.Create(outputFile)
if err != nil {
return err
}
defer out.Close()
// Create gzip writer
gw := gzip.NewWriter(out)
defer gw.Close()
// Create tar writer
tw := tar.NewWriter(gw)
defer tw.Close()
// Create a map to store MD5 hashes
fileHashes := make(map[string]string)
// Resolve absolute source path to handle relative symlinks correctly
sourceDir, err = filepath.Abs(sourceDir)
if err != nil {
return err
}
// Walk through source directory
err = filepath.Walk(sourceDir, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Get relative path to use in the tarball
relPath, err := filepath.Rel(sourceDir, filePath)
if err != nil {
return err
}
// Skip the manifest file if it exists (from a previous run)
if relPath == manifestFilename {
return nil
}
// Create tar header using original file info
header, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
// Update the name with the prefix and relative path
header.Name = filepath.Join(prefix, relPath)
// Special handling for symbolic links
if info.Mode()&os.ModeSymlink != 0 {
// Read link target
linkTarget, err := os.Readlink(filePath)
if err != nil {
return err
}
// Store the link target in the header
header.Linkname = linkTarget
// Make sure the link type is set correctly
header.Typeflag = tar.TypeSymlink
// Write header
if err := tw.WriteHeader(header); err != nil {
return err
}
// No content to write for symlinks
} else if !info.IsDir() {
// Regular file - write header first
if err := tw.WriteHeader(header); err != nil {
return err
}
// Open the file for reading
file, err := os.Open(filePath)
if err != nil {
return err
}
// Create a multiwriter to write to both the tar archive and MD5 hash
hashWriter := md5.New()
multiWriter := io.MultiWriter(tw, hashWriter)
// Copy file contents to both the tar archive and hash calculator
_, err = io.Copy(multiWriter, file)
file.Close() // Close file after reading
if err != nil {
return err
}
// Store the calculated hash in our map
hashString := hex.EncodeToString(hashWriter.Sum(nil))
fileHashes[header.Name] = hashString
} else {
// For directories, just write the header
if err := tw.WriteHeader(header); err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
// Create and add the manifest file
var manifestContent strings.Builder
for path, hash := range fileHashes {
manifestContent.WriteString(fmt.Sprintf("%s %s\n", hash, path))
}
// Create a tar header for the manifest
manifestHeader := &tar.Header{
Name: manifestFilename,
Mode: 0644,
Size: int64(manifestContent.Len()),
Typeflag: tar.TypeReg,
}
// Write the manifest header
if err := tw.WriteHeader(manifestHeader); err != nil {
return err
}
// Write the manifest content
if _, err := tw.Write([]byte(manifestContent.String())); err != nil {
return err
}
return nil
}
func extractTarball(tarballPath, extractDir string, verifyOnly bool) error {
// Open the tarball
file, err := os.Open(tarballPath)
if err != nil {
return err
}
defer file.Close()
// Create gzip reader
gr, err := gzip.NewReader(file)
if err != nil {
return err
}
defer gr.Close()
// Create tar reader
tr := tar.NewReader(gr)
// Create a map to store expected MD5 hashes from the manifest
expectedHashes := make(map[string]string)
// First pass: Find and parse the manifest file
tempDir := ""
if !verifyOnly {
// Create a temporary directory for extraction
tempDir, err = os.MkdirTemp(extractDir, "tarballer-extract-")
if err != nil {
return err
}
}
// Extract files to get the manifest
for {
header, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return err
}
// Skip directories in first pass, we only want to find the manifest
if header.Typeflag == tar.TypeDir {
continue
}
// Check if this is the manifest file
if filepath.Base(header.Name) == manifestFilename && filepath.Dir(header.Name) == "." {
// Read the manifest content
var content strings.Builder
if _, err := io.Copy(&content, tr); err != nil {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return err
}
// Parse the manifest to get expected hashes
scanner := bufio.NewScanner(strings.NewReader(content.String()))
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, " ", 2)
if len(parts) == 2 {
hash := parts[0]
path := parts[1]
expectedHashes[path] = hash
}
}
if err := scanner.Err(); err != nil {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return fmt.Errorf("error parsing manifest: %v", err)
}
continue
}
// If we're only verifying, skip extraction
if verifyOnly {
continue
}
// Extract to temp dir to verify hashes
target := filepath.Join(tempDir, header.Name)
// Create directory if needed
if header.Typeflag == tar.TypeDir {
if err := os.MkdirAll(target, 0755); err != nil {
os.RemoveAll(tempDir)
return err
}
continue
}
// Create parent directory if it doesn't exist
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
os.RemoveAll(tempDir)
return err
}
// Handle symlinks
if header.Typeflag == tar.TypeSymlink {
if err := os.Symlink(header.Linkname, target); err != nil {
os.RemoveAll(tempDir)
return err
}
continue
}
// Create regular file
f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, os.FileMode(header.Mode))
if err != nil {
os.RemoveAll(tempDir)
return err
}
// Copy file content
if _, err := io.Copy(f, tr); err != nil {
f.Close()
os.RemoveAll(tempDir)
return err
}
f.Close()
}
// If no manifest was found
if len(expectedHashes) == 0 {
if tempDir != "" {
os.RemoveAll(tempDir)
}
return fmt.Errorf("no MD5 manifest found in tarball")
}
// If we're only verifying, we need to reopen the tarball
if verifyOnly {
file.Seek(0, 0)
gr, err = gzip.NewReader(file)
if err != nil {
return err
}
defer gr.Close()
tr = tar.NewReader(gr)
}
// Second pass: Verify hashes
verificationFailed := false
fileVerified := make(map[string]bool)
missingFiles := []string{}
if verifyOnly {
// Extract to temp dir for verification
tempDir, err = os.MkdirTemp(extractDir, "tarballer-verify-")
if err != nil {
return err
}
defer os.RemoveAll(tempDir)
}
for {
header, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if tempDir != "" && !verifyOnly {
os.RemoveAll(tempDir)
}
return err
}
// Skip directories and the manifest file for verification
if header.Typeflag == tar.TypeDir || (filepath.Base(header.Name) == manifestFilename && filepath.Dir(header.Name) == ".") {
continue
}
// Skip symlinks for hash verification
if header.Typeflag == tar.TypeSymlink {
continue
}
// Check if this file has an expected hash
expectedHash, exists := expectedHashes[header.Name]
if !exists {
fmt.Printf("Warning: File %s not found in manifest\n", header.Name)
continue
}
// If verifyOnly, we need to extract the file to verify its hash
if verifyOnly {
target := filepath.Join(tempDir, header.Name)
// Create parent directory if it doesn't exist
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
return err
}
// Create file
f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, os.FileMode(header.Mode))
if err != nil {
return err
}
// Copy content
if _, err := io.Copy(f, tr); err != nil {
f.Close()
return err
}
f.Close()
// Calculate hash
actualHash, err := calcFileMD5(target)
if err != nil {
return err
}
// Verify hash
if actualHash != expectedHash {
fmt.Printf("Hash mismatch for %s: expected %s, got %s\n", header.Name, expectedHash, actualHash)
verificationFailed = true
} else {
fileVerified[header.Name] = true
}
} else {
// Calculate hash from extracted file
target := filepath.Join(tempDir, header.Name)
actualHash, err := calcFileMD5(target)
if err != nil {
os.RemoveAll(tempDir)
return err
}
// Verify hash
if actualHash != expectedHash {
fmt.Printf("Hash mismatch for %s: expected %s, got %s\n", header.Name, expectedHash, actualHash)
verificationFailed = true
} else {
fileVerified[header.Name] = true
}
}
}
// Check if all files in the manifest were verified
for path := range expectedHashes {
if !fileVerified[path] {
missingFiles = append(missingFiles, path)
}
}
// Only report missing files if verification is required
if verifyOnly && len(missingFiles) > 0 {
fmt.Printf("Warning: %d files in manifest were not found in the tarball\n", len(missingFiles))
// Optionally, print first few missing files as examples
if len(missingFiles) > 0 {
maxExamples := 5
if len(missingFiles) < maxExamples {
maxExamples = len(missingFiles)
}
fmt.Println("Examples of missing files:")
for i := 0; i < maxExamples; i++ {
fmt.Printf(" - %s\n", missingFiles[i])
}
if len(missingFiles) > maxExamples {
fmt.Printf(" - ...and %d more\n", len(missingFiles)-maxExamples)
}
}
}
// If verification failed or we're only verifying, we're done
if verificationFailed {
if !verifyOnly {
os.RemoveAll(tempDir)
}
return fmt.Errorf("hash verification failed for one or more files")
}
if verifyOnly {
fmt.Println("All files verified successfully!")
return nil
}
// Move the extracted files to the final destination (excluding manifest if needed)
files, err := os.ReadDir(tempDir)
if err != nil {
os.RemoveAll(tempDir)
return err
}
// Create the final extract directory if it doesn't exist
if err := os.MkdirAll(extractDir, 0755); err != nil {
os.RemoveAll(tempDir)
return err
}
// Move each top-level extracted item
for _, f := range files {
source := filepath.Join(tempDir, f.Name())
dest := filepath.Join(extractDir, f.Name())
// Skip the manifest file if needed
if f.Name() == manifestFilename {
continue
}
// If destination already exists, remove it
if _, err := os.Stat(dest); err == nil {
if err := os.RemoveAll(dest); err != nil {
os.RemoveAll(tempDir)
return err
}
}
// Move the file
if err := os.Rename(source, dest); err != nil {
// If rename fails (e.g., across devices), try copying
err = filepath.Walk(source, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Get relative path
relPath, err := filepath.Rel(source, path)
if err != nil {
return err
}
targetPath := filepath.Join(dest, relPath)
// Create directory
if info.IsDir() {
return os.MkdirAll(targetPath, info.Mode())
}
// Handle symlinks
if info.Mode()&os.ModeSymlink != 0 {
linkTarget, err := os.Readlink(path)
if err != nil {
return err
}
return os.Symlink(linkTarget, targetPath)
}
// Copy file
srcFile, err := os.Open(path)
if err != nil {
return err
}
defer srcFile.Close()
// Create destination file
dstFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY, info.Mode())
if err != nil {
return err
}
defer dstFile.Close()
// Copy content
_, err = io.Copy(dstFile, srcFile)
return err
})
if err != nil {
os.RemoveAll(tempDir)
return err
}
}
}
// Clean up temp directory
os.RemoveAll(tempDir)
fmt.Println("Extraction completed and verified successfully!")
return nil
}