package main import ( "archive/tar" "bufio" "compress/gzip" "crypto/md5" "encoding/hex" "flag" "fmt" "io" "os" "path/filepath" "strings" "time" ) const manifestFilename = ".md5-manifest.txt" func main() { // Define command line flags sourceDir := flag.String("source", "", "Source directory to compress") outputFile := flag.String("output", "output.tar.gz", "Output tarball filename") prefixDir := flag.String("prefix", "myapp", "Directory prefix in tarball") extractMode := flag.Bool("extract", false, "Extract mode (instead of create)") extractDir := flag.String("extractdir", "", "Directory to extract to (default: current directory)") verifyOnly := flag.Bool("verify", false, "Only verify hash integrity without extraction") verboseMode := flag.Bool("verbose", false, "Enable verbose output") flag.Parse() if *extractMode { if *outputFile == "output.tar.gz" && len(flag.Args()) > 0 { *outputFile = flag.Args()[0] } if *outputFile == "" { fmt.Println("Please specify a tarball to extract using -output or as a positional argument") flag.Usage() os.Exit(1) } // If extract directory is not specified, use current directory extractTo := *extractDir if extractTo == "" { extractTo = "." } err := extractTarball(*outputFile, extractTo, *verifyOnly, *verboseMode) if err != nil { fmt.Printf("Error extracting tarball: %v\n", err) os.Exit(1) } } else { if *sourceDir == "" { fmt.Println("Please specify a source directory using -source") flag.Usage() os.Exit(1) } err := createTarball(*sourceDir, *outputFile, *prefixDir, *verboseMode) if err != nil { fmt.Printf("Error creating tarball: %v\n", err) os.Exit(1) } fmt.Printf("Successfully created %s with prefix %s\n", *outputFile, *prefixDir) } } // calcFileMD5 calculates the MD5 hash of a file func calcFileMD5(filePath string) (string, error) { file, err := os.Open(filePath) if err != nil { return "", err } defer file.Close() hash := md5.New() if _, err := io.Copy(hash, file); err != nil { return "", err } return hex.EncodeToString(hash.Sum(nil)), nil } func createTarball(sourceDir, outputFile, prefix string, verboseMode bool) error { // Resolve absolute path of source directory absSourceDir, err := filepath.Abs(sourceDir) if err != nil { return err } if verboseMode { fmt.Printf("Creating tarball from directory: %s\n", absSourceDir) fmt.Printf("Output file: %s\n", outputFile) if prefix != "" { fmt.Printf("Using prefix: %s\n", prefix) } } // Create the output file out, err := os.Create(outputFile) if err != nil { return err } defer out.Close() // Create gzip writer if verboseMode { fmt.Println("Creating compressed archive...") } gw := gzip.NewWriter(out) defer gw.Close() // Create tar writer tw := tar.NewWriter(gw) defer tw.Close() // Create a map to store file hashes hashes := make(map[string]string) fileCount := 0 // Walk through the source directory err = filepath.Walk(absSourceDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err } fileCount++ if verboseMode && fileCount%100 == 0 { fmt.Printf("Processed %d files...\n", fileCount) } // Get the relative path relPath, err := filepath.Rel(absSourceDir, path) if err != nil { return err } // Skip the root directory if relPath == "." { return nil } // Add prefix if specified if prefix != "" { relPath = filepath.Join(prefix, relPath) } // Create header header, err := tar.FileInfoHeader(info, "") if err != nil { return err } // Set the name to be the relative path (with prefix if specified) header.Name = filepath.ToSlash(relPath) // Handle symlinks if info.Mode()&os.ModeSymlink != 0 { linkTarget, err := os.Readlink(path) if err != nil { return err } header.Linkname = linkTarget header.Typeflag = tar.TypeSymlink if verboseMode { fmt.Printf("Adding symlink: %s -> %s\n", header.Name, linkTarget) } // Write the header to the tarball if err := tw.WriteHeader(header); err != nil { return err } return nil } // Skip directories in hash calculation, but include in tarball if info.IsDir() { if verboseMode { fmt.Printf("Adding directory: %s\n", header.Name) } // Write the header to the tarball if err := tw.WriteHeader(header); err != nil { return err } return nil } // Skip the manifest file if it already exists in the source dir if filepath.Base(path) == manifestFilename && filepath.Dir(path) == absSourceDir { if verboseMode { fmt.Printf("Skipping existing manifest file: %s\n", path) } return nil } if verboseMode { fmt.Printf("Adding file: %s\n", header.Name) } // Write the header to the tarball if err := tw.WriteHeader(header); err != nil { return err } // Open the file for reading file, err := os.Open(path) if err != nil { return err } defer file.Close() // Create a hash writer to calculate MD5 while copying h := md5.New() multiWriter := io.MultiWriter(tw, h) // Copy file content to both the tarball and hash function if _, err := io.Copy(multiWriter, file); err != nil { return err } // Store the hash hashes[header.Name] = hex.EncodeToString(h.Sum(nil)) return nil }) if err != nil { return err } if verboseMode { fmt.Printf("Added %d files to tarball\n", fileCount) fmt.Println("Creating MD5 manifest...") } // Create MD5 manifest var manifest strings.Builder for path, hash := range hashes { fmt.Fprintf(&manifest, "%s %s\n", hash, path) } // Create header for the manifest header := &tar.Header{ Name: manifestFilename, Mode: 0644, Size: int64(manifest.Len()), ModTime: time.Now(), } if verboseMode { fmt.Printf("Adding manifest with %d entries\n", len(hashes)) } // Write the manifest header to the tarball if err := tw.WriteHeader(header); err != nil { return err } // Write the manifest content to the tarball if _, err := io.WriteString(tw, manifest.String()); err != nil { return err } // Close the writers if err := tw.Close(); err != nil { return err } if err := gw.Close(); err != nil { return err } if verboseMode { fmt.Printf("Tarball created successfully: %s\n", outputFile) } else { fmt.Println("Tarball created successfully!") } return nil } func extractTarball(tarballPath, extractDir string, verifyOnly, verboseMode bool) error { // Open the tarball if verboseMode { fmt.Printf("Opening tarball: %s\n", tarballPath) } file, err := os.Open(tarballPath) if err != nil { return err } defer file.Close() // Create gzip reader gr, err := gzip.NewReader(file) if err != nil { return err } defer gr.Close() // Create tar reader tr := tar.NewReader(gr) // Create a map to store expected MD5 hashes from the manifest expectedHashes := make(map[string]string) // First pass: Find and parse the manifest file tempDir := "" if !verifyOnly { // Create a temporary directory for extraction if verboseMode { fmt.Printf("Creating temporary extraction directory in: %s\n", extractDir) } tempDir, err = os.MkdirTemp(extractDir, "tarballer-extract-") if err != nil { return err } } // Extract files to get the manifest fileCount := 0 if verboseMode { fmt.Println("First pass: Looking for manifest file...") } for { header, err := tr.Next() if err == io.EOF { break } if err != nil { if tempDir != "" { os.RemoveAll(tempDir) } return err } // Skip directories in first pass, we only want to find the manifest if header.Typeflag == tar.TypeDir { continue } fileCount++ if verboseMode && fileCount%100 == 0 { fmt.Printf("Processed %d files while searching for manifest...\n", fileCount) } // Check if this is the manifest file if filepath.Base(header.Name) == manifestFilename && filepath.Dir(header.Name) == "." { if verboseMode { fmt.Println("Found manifest file, parsing hashes...") } // Read the manifest content var content strings.Builder if _, err := io.Copy(&content, tr); err != nil { if tempDir != "" { os.RemoveAll(tempDir) } return err } // Parse the manifest to get expected hashes hashCount := 0 scanner := bufio.NewScanner(strings.NewReader(content.String())) for scanner.Scan() { line := scanner.Text() parts := strings.SplitN(line, " ", 2) if len(parts) == 2 { hash := parts[0] path := parts[1] expectedHashes[path] = hash hashCount++ } } if verboseMode { fmt.Printf("Parsed %d hash entries from manifest\n", hashCount) } if err := scanner.Err(); err != nil { if tempDir != "" { os.RemoveAll(tempDir) } return fmt.Errorf("error parsing manifest: %v", err) } continue } // If we're only verifying, skip extraction if verifyOnly { continue } // Extract to temp dir to verify hashes target := filepath.Join(tempDir, header.Name) if verboseMode { fmt.Printf("Extracting (first pass): %s\n", header.Name) } // Create directory if needed if header.Typeflag == tar.TypeDir { if err := os.MkdirAll(target, 0755); err != nil { os.RemoveAll(tempDir) return err } continue } // Create parent directory if it doesn't exist if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil { os.RemoveAll(tempDir) return err } // Handle symlinks if header.Typeflag == tar.TypeSymlink { if verboseMode { fmt.Printf("Creating symlink: %s -> %s\n", target, header.Linkname) } if err := os.Symlink(header.Linkname, target); err != nil { os.RemoveAll(tempDir) return err } continue } // Create regular file f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, os.FileMode(header.Mode)) if err != nil { os.RemoveAll(tempDir) return err } // Copy file content if _, err := io.Copy(f, tr); err != nil { f.Close() os.RemoveAll(tempDir) return err } f.Close() } // If no manifest was found if len(expectedHashes) == 0 { if tempDir != "" { os.RemoveAll(tempDir) } return fmt.Errorf("no MD5 manifest found in tarball") } // If we're only verifying, we need to reopen the tarball if verifyOnly { if verboseMode { fmt.Println("Reopening tarball for verification...") } file.Seek(0, 0) gr, err = gzip.NewReader(file) if err != nil { return err } defer gr.Close() tr = tar.NewReader(gr) } // Second pass: Verify hashes verificationFailed := false fileVerified := make(map[string]bool) missingFiles := []string{} if verboseMode { fmt.Println("Second pass: Verifying file integrity...") } if verifyOnly { // Extract to temp dir for verification if verboseMode { fmt.Println("Creating temporary directory for verification only...") } tempDir, err = os.MkdirTemp(extractDir, "tarballer-verify-") if err != nil { return err } defer os.RemoveAll(tempDir) } fileCount = 0 for { header, err := tr.Next() if err == io.EOF { break } if err != nil { if tempDir != "" && !verifyOnly { os.RemoveAll(tempDir) } return err } fileCount++ if verboseMode && fileCount%100 == 0 { fmt.Printf("Verified %d files...\n", fileCount) } // Skip directories and the manifest file for verification if header.Typeflag == tar.TypeDir || (filepath.Base(header.Name) == manifestFilename && filepath.Dir(header.Name) == ".") { continue } // Skip symlinks for hash verification if header.Typeflag == tar.TypeSymlink { continue } // Check if this file has an expected hash expectedHash, exists := expectedHashes[header.Name] if !exists { if verboseMode { fmt.Printf("Warning: File %s not found in manifest\n", header.Name) } continue } // If verifyOnly, we need to extract the file to verify its hash if verifyOnly { target := filepath.Join(tempDir, header.Name) if verboseMode { fmt.Printf("Extracting for verification: %s\n", header.Name) } // Create parent directory if it doesn't exist if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil { return err } // Create file f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, os.FileMode(header.Mode)) if err != nil { return err } // Copy content if _, err := io.Copy(f, tr); err != nil { f.Close() return err } f.Close() // Calculate hash actualHash, err := calcFileMD5(target) if err != nil { return err } // Verify hash if actualHash != expectedHash { fmt.Printf("Hash mismatch for %s: expected %s, got %s\n", header.Name, expectedHash, actualHash) verificationFailed = true } else { if verboseMode { fmt.Printf("Hash verified: %s\n", header.Name) } fileVerified[header.Name] = true } } else { // Calculate hash from extracted file target := filepath.Join(tempDir, header.Name) actualHash, err := calcFileMD5(target) if err != nil { os.RemoveAll(tempDir) return err } // Verify hash if actualHash != expectedHash { fmt.Printf("Hash mismatch for %s: expected %s, got %s\n", header.Name, expectedHash, actualHash) verificationFailed = true } else { if verboseMode { fmt.Printf("Hash verified: %s\n", header.Name) } fileVerified[header.Name] = true } } } // Check if all files in the manifest were verified for path := range expectedHashes { if !fileVerified[path] { missingFiles = append(missingFiles, path) } } // Only report missing files if verification is required if verifyOnly && len(missingFiles) > 0 { fmt.Printf("Warning: %d files in manifest were not found in the tarball\n", len(missingFiles)) // Optionally, print first few missing files as examples if len(missingFiles) > 0 { maxExamples := 5 if len(missingFiles) < maxExamples { maxExamples = len(missingFiles) } fmt.Println("Examples of missing files:") for i := 0; i < maxExamples; i++ { fmt.Printf(" - %s\n", missingFiles[i]) } if len(missingFiles) > maxExamples { fmt.Printf(" - ...and %d more\n", len(missingFiles)-maxExamples) } } } // If verification failed or we're only verifying, we're done if verificationFailed { if !verifyOnly { os.RemoveAll(tempDir) } return fmt.Errorf("hash verification failed for one or more files") } if verifyOnly { fmt.Println("All files verified successfully!") return nil } // Move the extracted files to the final destination (excluding manifest if needed) if verboseMode { fmt.Println("Moving verified files to final destination...") } files, err := os.ReadDir(tempDir) if err != nil { os.RemoveAll(tempDir) return err } // Create the final extract directory if it doesn't exist if err := os.MkdirAll(extractDir, 0755); err != nil { os.RemoveAll(tempDir) return err } movedCount := 0 // Move each top-level extracted item for _, f := range files { source := filepath.Join(tempDir, f.Name()) dest := filepath.Join(extractDir, f.Name()) // Skip the manifest file if needed if f.Name() == manifestFilename { if verboseMode { fmt.Println("Skipping manifest file in final destination") } continue } if verboseMode { fmt.Printf("Moving: %s -> %s\n", source, dest) } // If destination already exists, remove it if _, err := os.Stat(dest); err == nil { if err := os.RemoveAll(dest); err != nil { os.RemoveAll(tempDir) return err } } // Move the file if err := os.Rename(source, dest); err != nil { // If rename fails (e.g., across devices), try copying if verboseMode { fmt.Printf("Direct move failed, using recursive copy for: %s\n", f.Name()) } err = filepath.Walk(source, func(path string, info os.FileInfo, err error) error { if err != nil { return err } // Get relative path relPath, err := filepath.Rel(source, path) if err != nil { return err } targetPath := filepath.Join(dest, relPath) // Create directory if info.IsDir() { return os.MkdirAll(targetPath, info.Mode()) } // Handle symlinks if info.Mode()&os.ModeSymlink != 0 { linkTarget, err := os.Readlink(path) if err != nil { return err } return os.Symlink(linkTarget, targetPath) } // Copy file srcFile, err := os.Open(path) if err != nil { return err } defer srcFile.Close() // Create destination file dstFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY, info.Mode()) if err != nil { return err } defer dstFile.Close() // Copy content _, err = io.Copy(dstFile, srcFile) return err }) if err != nil { os.RemoveAll(tempDir) return err } } movedCount++ } // Clean up temp directory os.RemoveAll(tempDir) if verboseMode { fmt.Printf("Extraction complete: %d files extracted and verified, %d files moved to final destination\n", fileCount, movedCount) } else { fmt.Println("Extraction completed and verified successfully!") } return nil }