Initial commit: Disaster recovery CLI tool

A Go-based CLI tool for recovering servers from backups to new cloud VMs.

Features:
- Multi-cloud support: Exoscale, Cloudscale, Hetzner Cloud
- Backup sources: Local filesystem, Hetzner Storage Box
- 6-stage restore pipeline with /etc whitelist protection
- DNS migration with safety checks and auto-rollback
- Dry-run by default, requires --yes to execute
- Cloud-init for SSH key injection

Packages:
- cmd/recover-server: CLI commands (recover, migrate-dns, list, cleanup)
- internal/providers: Cloud provider implementations
- internal/backup: Backup source implementations
- internal/restore: 6-stage restore pipeline
- internal/dns: Exoscale DNS management
- internal/ui: Prompts, progress, dry-run display
- internal/config: Environment and host configuration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Olaf Berberich
2025-12-08 00:31:27 +00:00
commit 29a2886402
26 changed files with 3826 additions and 0 deletions

102
internal/restore/docker.go Normal file
View File

@@ -0,0 +1,102 @@
package restore
import (
"context"
"fmt"
"strings"
"time"
)
// startDocker ensures Docker is running and starts compose stacks
func (p *Pipeline) startDocker(ctx context.Context) error {
// Ensure Docker is enabled and running
if err := p.remoteCmd(ctx, "systemctl enable docker"); err != nil {
return fmt.Errorf("failed to enable docker: %w", err)
}
if err := p.remoteCmd(ctx, "systemctl start docker"); err != nil {
return fmt.Errorf("failed to start docker: %w", err)
}
// Wait for Docker to be ready
for i := 0; i < 30; i++ {
if err := p.remoteCmd(ctx, "docker info > /dev/null 2>&1"); err == nil {
break
}
time.Sleep(time.Second)
}
// Find and start docker-compose stacks
findCmd := "find /opt -name 'docker-compose.yml' -o -name 'docker-compose.yaml' -o -name 'compose.yml' -o -name 'compose.yaml' 2>/dev/null | head -20"
output, err := p.remoteCmdOutput(ctx, findCmd)
if err != nil || strings.TrimSpace(output) == "" {
if p.Verbose {
fmt.Println(" No docker-compose files found")
}
return nil
}
composeFiles := strings.Split(strings.TrimSpace(output), "\n")
for _, file := range composeFiles {
if file == "" {
continue
}
// Get directory containing compose file
dir := file[:strings.LastIndex(file, "/")]
if p.Verbose {
fmt.Printf(" Starting compose stack in %s\n", dir)
}
// Try docker compose (v2) first, fall back to docker-compose (v1)
startCmd := fmt.Sprintf("cd %s && (docker compose up -d 2>/dev/null || docker-compose up -d)", dir)
if err := p.remoteCmd(ctx, startCmd); err != nil {
if p.Verbose {
fmt.Printf(" Warning: failed to start stack in %s: %v\n", dir, err)
}
// Don't fail on individual stack failures
}
}
return nil
}
// runHealth performs health verification
func (p *Pipeline) runHealth(ctx context.Context) error {
checks := []struct {
name string
cmd string
require bool
}{
{"SSH accessible", "echo ok", true},
{"Docker running", "docker info > /dev/null 2>&1 && echo ok", true},
{"Network connectivity", "ping -c 1 8.8.8.8 > /dev/null 2>&1 && echo ok", false},
{"DNS resolution", "host google.com > /dev/null 2>&1 && echo ok", false},
}
var failures []string
for _, check := range checks {
output, err := p.remoteCmdOutput(ctx, check.cmd)
success := err == nil && strings.TrimSpace(output) == "ok"
status := "✓"
if !success {
status = "✗"
if check.require {
failures = append(failures, check.name)
}
}
if p.Verbose {
fmt.Printf(" %s %s\n", status, check.name)
}
}
if len(failures) > 0 {
return fmt.Errorf("required health checks failed: %v", failures)
}
return nil
}

View File

@@ -0,0 +1,130 @@
package restore
import (
"context"
"fmt"
"time"
"recover-server/internal/backup"
"recover-server/internal/providers"
)
// Stage represents a restore stage
type Stage int
const (
StageSync Stage = iota + 1
StageEtc
StageSelectiveEtc
StageSSHKeys
StageServices
StageHealth
)
func (s Stage) String() string {
names := map[Stage]string{
StageSync: "Sync /root and /opt",
StageEtc: "Stage /etc backup",
StageSelectiveEtc: "Selective /etc restore",
StageSSHKeys: "Merge SSH keys",
StageServices: "Start services",
StageHealth: "Health verification",
}
return names[s]
}
// StageResult contains the result of a stage execution
type StageResult struct {
Stage Stage
Success bool
Message string
Duration time.Duration
Error error
}
// Pipeline orchestrates the restore process
type Pipeline struct {
VM *providers.VM
BackupSource backup.BackupSource
HostName string
SSHKeyPath string // Path to ephemeral private key
SSHUser string // Usually "root"
DryRun bool
Verbose bool
results []StageResult
}
// NewPipeline creates a new restore pipeline
func NewPipeline(vm *providers.VM, source backup.BackupSource, host, sshKeyPath string) *Pipeline {
return &Pipeline{
VM: vm,
BackupSource: source,
HostName: host,
SSHKeyPath: sshKeyPath,
SSHUser: "root",
results: make([]StageResult, 0),
}
}
// Run executes all stages
func (p *Pipeline) Run(ctx context.Context) error {
stages := []struct {
stage Stage
fn func(context.Context) error
}{
{StageSync, p.runSync},
{StageEtc, p.runEtcStaging},
{StageSelectiveEtc, p.runSelectiveEtc},
{StageSSHKeys, p.runSSHKeyMerge},
{StageServices, p.runServices},
{StageHealth, p.runHealth},
}
for _, s := range stages {
start := time.Now()
if p.Verbose {
fmt.Printf("\n=== Stage %d: %s ===\n", s.stage, s.stage)
}
if p.DryRun {
p.results = append(p.results, StageResult{
Stage: s.stage,
Success: true,
Message: "[DRY RUN] Would execute: " + s.stage.String(),
Duration: 0,
})
continue
}
err := s.fn(ctx)
result := StageResult{
Stage: s.stage,
Success: err == nil,
Duration: time.Since(start),
Error: err,
}
if err != nil {
result.Message = err.Error()
p.results = append(p.results, result)
return fmt.Errorf("stage %d (%s) failed: %w", s.stage, s.stage, err)
}
result.Message = "Completed successfully"
p.results = append(p.results, result)
}
return nil
}
// Results returns all stage results
func (p *Pipeline) Results() []StageResult {
return p.results
}
// sshTarget returns the SSH target string
func (p *Pipeline) sshTarget() string {
return fmt.Sprintf("%s@%s", p.SSHUser, p.VM.PublicIP)
}

110
internal/restore/ssh.go Normal file
View File

@@ -0,0 +1,110 @@
package restore
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/pem"
"fmt"
"os"
"path/filepath"
"strings"
"golang.org/x/crypto/ssh"
)
// SSHKeyPair holds an ephemeral SSH key pair
type SSHKeyPair struct {
PrivateKeyPath string
PublicKey string
}
// GenerateEphemeralKey creates a temporary ED25519 SSH key pair
func GenerateEphemeralKey() (*SSHKeyPair, error) {
// Generate ED25519 key pair
pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, fmt.Errorf("failed to generate key: %w", err)
}
// Convert to SSH format
sshPubKey, err := ssh.NewPublicKey(pubKey)
if err != nil {
return nil, fmt.Errorf("failed to create SSH public key: %w", err)
}
// Marshal public key
pubKeyStr := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(sshPubKey)))
// Create temp directory for key
tmpDir, err := os.MkdirTemp("", "recover-ssh-")
if err != nil {
return nil, fmt.Errorf("failed to create temp dir: %w", err)
}
// Write private key in OpenSSH format
privKeyPath := filepath.Join(tmpDir, "id_ed25519")
// Marshal private key to OpenSSH format
pemBlock, err := ssh.MarshalPrivateKey(privKey, "")
if err != nil {
return nil, fmt.Errorf("failed to marshal private key: %w", err)
}
privKeyPEM := pem.EncodeToMemory(pemBlock)
if err := os.WriteFile(privKeyPath, privKeyPEM, 0600); err != nil {
return nil, fmt.Errorf("failed to write private key: %w", err)
}
return &SSHKeyPair{
PrivateKeyPath: privKeyPath,
PublicKey: pubKeyStr,
}, nil
}
// Cleanup removes the ephemeral key files
func (k *SSHKeyPair) Cleanup() {
if k.PrivateKeyPath != "" {
os.RemoveAll(filepath.Dir(k.PrivateKeyPath))
}
}
// runSSHKeyMerge merges original authorized_keys with ephemeral key
func (p *Pipeline) runSSHKeyMerge(ctx context.Context) error {
// First, backup current authorized_keys
backupCmd := "cp /root/.ssh/authorized_keys /root/.ssh/authorized_keys.ephemeral 2>/dev/null || true"
p.remoteCmd(ctx, backupCmd)
// Check if we have original keys in the restored /root
checkCmd := "cat /root/.ssh/authorized_keys.original 2>/dev/null || cat /srv/restore/root/.ssh/authorized_keys 2>/dev/null || echo ''"
originalKeys, _ := p.remoteCmdOutput(ctx, checkCmd)
// Get current (ephemeral) keys
currentKeys, _ := p.remoteCmdOutput(ctx, "cat /root/.ssh/authorized_keys 2>/dev/null || echo ''")
// Merge keys (unique)
allKeys := make(map[string]bool)
for _, key := range strings.Split(currentKeys, "\n") {
key = strings.TrimSpace(key)
if key != "" && !strings.HasPrefix(key, "#") {
allKeys[key] = true
}
}
for _, key := range strings.Split(originalKeys, "\n") {
key = strings.TrimSpace(key)
if key != "" && !strings.HasPrefix(key, "#") {
allKeys[key] = true
}
}
// Write merged keys
var mergedKeys []string
for key := range allKeys {
mergedKeys = append(mergedKeys, key)
}
mergeCmd := fmt.Sprintf("mkdir -p /root/.ssh && echo '%s' > /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys",
strings.Join(mergedKeys, "\n"))
return p.remoteCmd(ctx, mergeCmd)
}

109
internal/restore/stages.go Normal file
View File

@@ -0,0 +1,109 @@
package restore
import (
"context"
"fmt"
"os/exec"
"strings"
)
// /etc whitelist - only these are restored
var etcWhitelist = []string{
"wireguard",
"letsencrypt",
"nginx",
"rsyslog-certs",
"systemd/system",
"docker",
"hostname",
"hosts",
"passwd",
"group",
"shadow",
"gshadow",
}
// runSync syncs /root and /opt from backup
func (p *Pipeline) runSync(ctx context.Context) error {
dirs := []string{"root", "opt"}
return p.BackupSource.SyncTo(ctx, p.HostName, p.sshTarget(), p.SSHKeyPath, dirs)
}
// runEtcStaging stages /etc to /srv/restore/etc
func (p *Pipeline) runEtcStaging(ctx context.Context) error {
// Create staging directory on target
if err := p.remoteCmd(ctx, "mkdir -p /srv/restore"); err != nil {
return fmt.Errorf("failed to create staging dir: %w", err)
}
// Sync /etc to staging
dirs := []string{"etc"}
return p.BackupSource.SyncTo(ctx, p.HostName, p.sshTarget(), p.SSHKeyPath, dirs)
}
// runSelectiveEtc copies only whitelisted items from staged /etc
func (p *Pipeline) runSelectiveEtc(ctx context.Context) error {
for _, item := range etcWhitelist {
src := fmt.Sprintf("/srv/restore/etc/%s", item)
dst := fmt.Sprintf("/etc/%s", item)
// Check if source exists
checkCmd := fmt.Sprintf("test -e %s && echo exists || echo missing", src)
output, err := p.remoteCmdOutput(ctx, checkCmd)
if err != nil || strings.TrimSpace(output) == "missing" {
if p.Verbose {
fmt.Printf(" Skipping %s (not in backup)\n", item)
}
continue
}
// Create parent directory if needed
parentDir := fmt.Sprintf("/etc/%s", strings.Split(item, "/")[0])
if strings.Contains(item, "/") {
p.remoteCmd(ctx, fmt.Sprintf("mkdir -p %s", parentDir))
}
// Copy with rsync for proper permissions
copyCmd := fmt.Sprintf("rsync -av %s %s", src, dst)
if err := p.remoteCmd(ctx, copyCmd); err != nil {
return fmt.Errorf("failed to restore %s: %w", item, err)
}
if p.Verbose {
fmt.Printf(" Restored %s\n", item)
}
}
return nil
}
// remoteCmd runs a command on the target VM
func (p *Pipeline) remoteCmd(ctx context.Context, cmd string) error {
sshArgs := []string{
"-i", p.SSHKeyPath,
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=10",
p.sshTarget(),
cmd,
}
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
return sshCmd.Run()
}
// remoteCmdOutput runs a command and returns output
func (p *Pipeline) remoteCmdOutput(ctx context.Context, cmd string) (string, error) {
sshArgs := []string{
"-i", p.SSHKeyPath,
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=10",
p.sshTarget(),
cmd,
}
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
output, err := sshCmd.Output()
return string(output), err
}

View File

@@ -0,0 +1,63 @@
package restore
import (
"context"
"fmt"
"strings"
)
// runServices starts restored services
func (p *Pipeline) runServices(ctx context.Context) error {
// Start WireGuard interfaces
if err := p.startWireGuard(ctx); err != nil {
// WireGuard is optional, log but don't fail
if p.Verbose {
fmt.Printf(" WireGuard: %v\n", err)
}
}
// Start Docker
if err := p.startDocker(ctx); err != nil {
return fmt.Errorf("failed to start Docker: %w", err)
}
return nil
}
// startWireGuard enables and starts WireGuard interfaces
func (p *Pipeline) startWireGuard(ctx context.Context) error {
// Check if WireGuard configs exist
checkCmd := "ls /etc/wireguard/*.conf 2>/dev/null | head -5"
output, err := p.remoteCmdOutput(ctx, checkCmd)
if err != nil || strings.TrimSpace(output) == "" {
return fmt.Errorf("no WireGuard configs found")
}
// Get interface names
configs := strings.Split(strings.TrimSpace(output), "\n")
for _, conf := range configs {
if conf == "" {
continue
}
// Extract interface name from path (e.g., /etc/wireguard/wg0.conf -> wg0)
parts := strings.Split(conf, "/")
filename := parts[len(parts)-1]
iface := strings.TrimSuffix(filename, ".conf")
if p.Verbose {
fmt.Printf(" Starting WireGuard interface: %s\n", iface)
}
// Enable and start
enableCmd := fmt.Sprintf("systemctl enable wg-quick@%s", iface)
startCmd := fmt.Sprintf("systemctl start wg-quick@%s", iface)
p.remoteCmd(ctx, enableCmd)
if err := p.remoteCmd(ctx, startCmd); err != nil {
return fmt.Errorf("failed to start %s: %w", iface, err)
}
}
return nil
}