Initial commit: Disaster recovery CLI tool
A Go-based CLI tool for recovering servers from backups to new cloud VMs. Features: - Multi-cloud support: Exoscale, Cloudscale, Hetzner Cloud - Backup sources: Local filesystem, Hetzner Storage Box - 6-stage restore pipeline with /etc whitelist protection - DNS migration with safety checks and auto-rollback - Dry-run by default, requires --yes to execute - Cloud-init for SSH key injection Packages: - cmd/recover-server: CLI commands (recover, migrate-dns, list, cleanup) - internal/providers: Cloud provider implementations - internal/backup: Backup source implementations - internal/restore: 6-stage restore pipeline - internal/dns: Exoscale DNS management - internal/ui: Prompts, progress, dry-run display - internal/config: Environment and host configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
102
internal/restore/docker.go
Normal file
102
internal/restore/docker.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// startDocker ensures Docker is running and starts compose stacks
|
||||
func (p *Pipeline) startDocker(ctx context.Context) error {
|
||||
// Ensure Docker is enabled and running
|
||||
if err := p.remoteCmd(ctx, "systemctl enable docker"); err != nil {
|
||||
return fmt.Errorf("failed to enable docker: %w", err)
|
||||
}
|
||||
|
||||
if err := p.remoteCmd(ctx, "systemctl start docker"); err != nil {
|
||||
return fmt.Errorf("failed to start docker: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Docker to be ready
|
||||
for i := 0; i < 30; i++ {
|
||||
if err := p.remoteCmd(ctx, "docker info > /dev/null 2>&1"); err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
|
||||
// Find and start docker-compose stacks
|
||||
findCmd := "find /opt -name 'docker-compose.yml' -o -name 'docker-compose.yaml' -o -name 'compose.yml' -o -name 'compose.yaml' 2>/dev/null | head -20"
|
||||
output, err := p.remoteCmdOutput(ctx, findCmd)
|
||||
if err != nil || strings.TrimSpace(output) == "" {
|
||||
if p.Verbose {
|
||||
fmt.Println(" No docker-compose files found")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
composeFiles := strings.Split(strings.TrimSpace(output), "\n")
|
||||
for _, file := range composeFiles {
|
||||
if file == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get directory containing compose file
|
||||
dir := file[:strings.LastIndex(file, "/")]
|
||||
|
||||
if p.Verbose {
|
||||
fmt.Printf(" Starting compose stack in %s\n", dir)
|
||||
}
|
||||
|
||||
// Try docker compose (v2) first, fall back to docker-compose (v1)
|
||||
startCmd := fmt.Sprintf("cd %s && (docker compose up -d 2>/dev/null || docker-compose up -d)", dir)
|
||||
if err := p.remoteCmd(ctx, startCmd); err != nil {
|
||||
if p.Verbose {
|
||||
fmt.Printf(" Warning: failed to start stack in %s: %v\n", dir, err)
|
||||
}
|
||||
// Don't fail on individual stack failures
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// runHealth performs health verification
|
||||
func (p *Pipeline) runHealth(ctx context.Context) error {
|
||||
checks := []struct {
|
||||
name string
|
||||
cmd string
|
||||
require bool
|
||||
}{
|
||||
{"SSH accessible", "echo ok", true},
|
||||
{"Docker running", "docker info > /dev/null 2>&1 && echo ok", true},
|
||||
{"Network connectivity", "ping -c 1 8.8.8.8 > /dev/null 2>&1 && echo ok", false},
|
||||
{"DNS resolution", "host google.com > /dev/null 2>&1 && echo ok", false},
|
||||
}
|
||||
|
||||
var failures []string
|
||||
|
||||
for _, check := range checks {
|
||||
output, err := p.remoteCmdOutput(ctx, check.cmd)
|
||||
success := err == nil && strings.TrimSpace(output) == "ok"
|
||||
|
||||
status := "✓"
|
||||
if !success {
|
||||
status = "✗"
|
||||
if check.require {
|
||||
failures = append(failures, check.name)
|
||||
}
|
||||
}
|
||||
|
||||
if p.Verbose {
|
||||
fmt.Printf(" %s %s\n", status, check.name)
|
||||
}
|
||||
}
|
||||
|
||||
if len(failures) > 0 {
|
||||
return fmt.Errorf("required health checks failed: %v", failures)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
130
internal/restore/pipeline.go
Normal file
130
internal/restore/pipeline.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"recover-server/internal/backup"
|
||||
"recover-server/internal/providers"
|
||||
)
|
||||
|
||||
// Stage represents a restore stage
|
||||
type Stage int
|
||||
|
||||
const (
|
||||
StageSync Stage = iota + 1
|
||||
StageEtc
|
||||
StageSelectiveEtc
|
||||
StageSSHKeys
|
||||
StageServices
|
||||
StageHealth
|
||||
)
|
||||
|
||||
func (s Stage) String() string {
|
||||
names := map[Stage]string{
|
||||
StageSync: "Sync /root and /opt",
|
||||
StageEtc: "Stage /etc backup",
|
||||
StageSelectiveEtc: "Selective /etc restore",
|
||||
StageSSHKeys: "Merge SSH keys",
|
||||
StageServices: "Start services",
|
||||
StageHealth: "Health verification",
|
||||
}
|
||||
return names[s]
|
||||
}
|
||||
|
||||
// StageResult contains the result of a stage execution
|
||||
type StageResult struct {
|
||||
Stage Stage
|
||||
Success bool
|
||||
Message string
|
||||
Duration time.Duration
|
||||
Error error
|
||||
}
|
||||
|
||||
// Pipeline orchestrates the restore process
|
||||
type Pipeline struct {
|
||||
VM *providers.VM
|
||||
BackupSource backup.BackupSource
|
||||
HostName string
|
||||
SSHKeyPath string // Path to ephemeral private key
|
||||
SSHUser string // Usually "root"
|
||||
DryRun bool
|
||||
Verbose bool
|
||||
|
||||
results []StageResult
|
||||
}
|
||||
|
||||
// NewPipeline creates a new restore pipeline
|
||||
func NewPipeline(vm *providers.VM, source backup.BackupSource, host, sshKeyPath string) *Pipeline {
|
||||
return &Pipeline{
|
||||
VM: vm,
|
||||
BackupSource: source,
|
||||
HostName: host,
|
||||
SSHKeyPath: sshKeyPath,
|
||||
SSHUser: "root",
|
||||
results: make([]StageResult, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Run executes all stages
|
||||
func (p *Pipeline) Run(ctx context.Context) error {
|
||||
stages := []struct {
|
||||
stage Stage
|
||||
fn func(context.Context) error
|
||||
}{
|
||||
{StageSync, p.runSync},
|
||||
{StageEtc, p.runEtcStaging},
|
||||
{StageSelectiveEtc, p.runSelectiveEtc},
|
||||
{StageSSHKeys, p.runSSHKeyMerge},
|
||||
{StageServices, p.runServices},
|
||||
{StageHealth, p.runHealth},
|
||||
}
|
||||
|
||||
for _, s := range stages {
|
||||
start := time.Now()
|
||||
|
||||
if p.Verbose {
|
||||
fmt.Printf("\n=== Stage %d: %s ===\n", s.stage, s.stage)
|
||||
}
|
||||
|
||||
if p.DryRun {
|
||||
p.results = append(p.results, StageResult{
|
||||
Stage: s.stage,
|
||||
Success: true,
|
||||
Message: "[DRY RUN] Would execute: " + s.stage.String(),
|
||||
Duration: 0,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
err := s.fn(ctx)
|
||||
result := StageResult{
|
||||
Stage: s.stage,
|
||||
Success: err == nil,
|
||||
Duration: time.Since(start),
|
||||
Error: err,
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
result.Message = err.Error()
|
||||
p.results = append(p.results, result)
|
||||
return fmt.Errorf("stage %d (%s) failed: %w", s.stage, s.stage, err)
|
||||
}
|
||||
|
||||
result.Message = "Completed successfully"
|
||||
p.results = append(p.results, result)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Results returns all stage results
|
||||
func (p *Pipeline) Results() []StageResult {
|
||||
return p.results
|
||||
}
|
||||
|
||||
// sshTarget returns the SSH target string
|
||||
func (p *Pipeline) sshTarget() string {
|
||||
return fmt.Sprintf("%s@%s", p.SSHUser, p.VM.PublicIP)
|
||||
}
|
||||
110
internal/restore/ssh.go
Normal file
110
internal/restore/ssh.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// SSHKeyPair holds an ephemeral SSH key pair
|
||||
type SSHKeyPair struct {
|
||||
PrivateKeyPath string
|
||||
PublicKey string
|
||||
}
|
||||
|
||||
// GenerateEphemeralKey creates a temporary ED25519 SSH key pair
|
||||
func GenerateEphemeralKey() (*SSHKeyPair, error) {
|
||||
// Generate ED25519 key pair
|
||||
pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate key: %w", err)
|
||||
}
|
||||
|
||||
// Convert to SSH format
|
||||
sshPubKey, err := ssh.NewPublicKey(pubKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create SSH public key: %w", err)
|
||||
}
|
||||
|
||||
// Marshal public key
|
||||
pubKeyStr := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(sshPubKey)))
|
||||
|
||||
// Create temp directory for key
|
||||
tmpDir, err := os.MkdirTemp("", "recover-ssh-")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create temp dir: %w", err)
|
||||
}
|
||||
|
||||
// Write private key in OpenSSH format
|
||||
privKeyPath := filepath.Join(tmpDir, "id_ed25519")
|
||||
|
||||
// Marshal private key to OpenSSH format
|
||||
pemBlock, err := ssh.MarshalPrivateKey(privKey, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal private key: %w", err)
|
||||
}
|
||||
|
||||
privKeyPEM := pem.EncodeToMemory(pemBlock)
|
||||
if err := os.WriteFile(privKeyPath, privKeyPEM, 0600); err != nil {
|
||||
return nil, fmt.Errorf("failed to write private key: %w", err)
|
||||
}
|
||||
|
||||
return &SSHKeyPair{
|
||||
PrivateKeyPath: privKeyPath,
|
||||
PublicKey: pubKeyStr,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Cleanup removes the ephemeral key files
|
||||
func (k *SSHKeyPair) Cleanup() {
|
||||
if k.PrivateKeyPath != "" {
|
||||
os.RemoveAll(filepath.Dir(k.PrivateKeyPath))
|
||||
}
|
||||
}
|
||||
|
||||
// runSSHKeyMerge merges original authorized_keys with ephemeral key
|
||||
func (p *Pipeline) runSSHKeyMerge(ctx context.Context) error {
|
||||
// First, backup current authorized_keys
|
||||
backupCmd := "cp /root/.ssh/authorized_keys /root/.ssh/authorized_keys.ephemeral 2>/dev/null || true"
|
||||
p.remoteCmd(ctx, backupCmd)
|
||||
|
||||
// Check if we have original keys in the restored /root
|
||||
checkCmd := "cat /root/.ssh/authorized_keys.original 2>/dev/null || cat /srv/restore/root/.ssh/authorized_keys 2>/dev/null || echo ''"
|
||||
originalKeys, _ := p.remoteCmdOutput(ctx, checkCmd)
|
||||
|
||||
// Get current (ephemeral) keys
|
||||
currentKeys, _ := p.remoteCmdOutput(ctx, "cat /root/.ssh/authorized_keys 2>/dev/null || echo ''")
|
||||
|
||||
// Merge keys (unique)
|
||||
allKeys := make(map[string]bool)
|
||||
for _, key := range strings.Split(currentKeys, "\n") {
|
||||
key = strings.TrimSpace(key)
|
||||
if key != "" && !strings.HasPrefix(key, "#") {
|
||||
allKeys[key] = true
|
||||
}
|
||||
}
|
||||
for _, key := range strings.Split(originalKeys, "\n") {
|
||||
key = strings.TrimSpace(key)
|
||||
if key != "" && !strings.HasPrefix(key, "#") {
|
||||
allKeys[key] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Write merged keys
|
||||
var mergedKeys []string
|
||||
for key := range allKeys {
|
||||
mergedKeys = append(mergedKeys, key)
|
||||
}
|
||||
|
||||
mergeCmd := fmt.Sprintf("mkdir -p /root/.ssh && echo '%s' > /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys",
|
||||
strings.Join(mergedKeys, "\n"))
|
||||
|
||||
return p.remoteCmd(ctx, mergeCmd)
|
||||
}
|
||||
109
internal/restore/stages.go
Normal file
109
internal/restore/stages.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// /etc whitelist - only these are restored
|
||||
var etcWhitelist = []string{
|
||||
"wireguard",
|
||||
"letsencrypt",
|
||||
"nginx",
|
||||
"rsyslog-certs",
|
||||
"systemd/system",
|
||||
"docker",
|
||||
"hostname",
|
||||
"hosts",
|
||||
"passwd",
|
||||
"group",
|
||||
"shadow",
|
||||
"gshadow",
|
||||
}
|
||||
|
||||
// runSync syncs /root and /opt from backup
|
||||
func (p *Pipeline) runSync(ctx context.Context) error {
|
||||
dirs := []string{"root", "opt"}
|
||||
return p.BackupSource.SyncTo(ctx, p.HostName, p.sshTarget(), p.SSHKeyPath, dirs)
|
||||
}
|
||||
|
||||
// runEtcStaging stages /etc to /srv/restore/etc
|
||||
func (p *Pipeline) runEtcStaging(ctx context.Context) error {
|
||||
// Create staging directory on target
|
||||
if err := p.remoteCmd(ctx, "mkdir -p /srv/restore"); err != nil {
|
||||
return fmt.Errorf("failed to create staging dir: %w", err)
|
||||
}
|
||||
|
||||
// Sync /etc to staging
|
||||
dirs := []string{"etc"}
|
||||
return p.BackupSource.SyncTo(ctx, p.HostName, p.sshTarget(), p.SSHKeyPath, dirs)
|
||||
}
|
||||
|
||||
// runSelectiveEtc copies only whitelisted items from staged /etc
|
||||
func (p *Pipeline) runSelectiveEtc(ctx context.Context) error {
|
||||
for _, item := range etcWhitelist {
|
||||
src := fmt.Sprintf("/srv/restore/etc/%s", item)
|
||||
dst := fmt.Sprintf("/etc/%s", item)
|
||||
|
||||
// Check if source exists
|
||||
checkCmd := fmt.Sprintf("test -e %s && echo exists || echo missing", src)
|
||||
output, err := p.remoteCmdOutput(ctx, checkCmd)
|
||||
if err != nil || strings.TrimSpace(output) == "missing" {
|
||||
if p.Verbose {
|
||||
fmt.Printf(" Skipping %s (not in backup)\n", item)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Create parent directory if needed
|
||||
parentDir := fmt.Sprintf("/etc/%s", strings.Split(item, "/")[0])
|
||||
if strings.Contains(item, "/") {
|
||||
p.remoteCmd(ctx, fmt.Sprintf("mkdir -p %s", parentDir))
|
||||
}
|
||||
|
||||
// Copy with rsync for proper permissions
|
||||
copyCmd := fmt.Sprintf("rsync -av %s %s", src, dst)
|
||||
if err := p.remoteCmd(ctx, copyCmd); err != nil {
|
||||
return fmt.Errorf("failed to restore %s: %w", item, err)
|
||||
}
|
||||
|
||||
if p.Verbose {
|
||||
fmt.Printf(" Restored %s\n", item)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// remoteCmd runs a command on the target VM
|
||||
func (p *Pipeline) remoteCmd(ctx context.Context, cmd string) error {
|
||||
sshArgs := []string{
|
||||
"-i", p.SSHKeyPath,
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=10",
|
||||
p.sshTarget(),
|
||||
cmd,
|
||||
}
|
||||
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
|
||||
return sshCmd.Run()
|
||||
}
|
||||
|
||||
// remoteCmdOutput runs a command and returns output
|
||||
func (p *Pipeline) remoteCmdOutput(ctx context.Context, cmd string) (string, error) {
|
||||
sshArgs := []string{
|
||||
"-i", p.SSHKeyPath,
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=10",
|
||||
p.sshTarget(),
|
||||
cmd,
|
||||
}
|
||||
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
|
||||
output, err := sshCmd.Output()
|
||||
return string(output), err
|
||||
}
|
||||
63
internal/restore/wireguard.go
Normal file
63
internal/restore/wireguard.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// runServices starts restored services
|
||||
func (p *Pipeline) runServices(ctx context.Context) error {
|
||||
// Start WireGuard interfaces
|
||||
if err := p.startWireGuard(ctx); err != nil {
|
||||
// WireGuard is optional, log but don't fail
|
||||
if p.Verbose {
|
||||
fmt.Printf(" WireGuard: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Start Docker
|
||||
if err := p.startDocker(ctx); err != nil {
|
||||
return fmt.Errorf("failed to start Docker: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// startWireGuard enables and starts WireGuard interfaces
|
||||
func (p *Pipeline) startWireGuard(ctx context.Context) error {
|
||||
// Check if WireGuard configs exist
|
||||
checkCmd := "ls /etc/wireguard/*.conf 2>/dev/null | head -5"
|
||||
output, err := p.remoteCmdOutput(ctx, checkCmd)
|
||||
if err != nil || strings.TrimSpace(output) == "" {
|
||||
return fmt.Errorf("no WireGuard configs found")
|
||||
}
|
||||
|
||||
// Get interface names
|
||||
configs := strings.Split(strings.TrimSpace(output), "\n")
|
||||
for _, conf := range configs {
|
||||
if conf == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract interface name from path (e.g., /etc/wireguard/wg0.conf -> wg0)
|
||||
parts := strings.Split(conf, "/")
|
||||
filename := parts[len(parts)-1]
|
||||
iface := strings.TrimSuffix(filename, ".conf")
|
||||
|
||||
if p.Verbose {
|
||||
fmt.Printf(" Starting WireGuard interface: %s\n", iface)
|
||||
}
|
||||
|
||||
// Enable and start
|
||||
enableCmd := fmt.Sprintf("systemctl enable wg-quick@%s", iface)
|
||||
startCmd := fmt.Sprintf("systemctl start wg-quick@%s", iface)
|
||||
|
||||
p.remoteCmd(ctx, enableCmd)
|
||||
if err := p.remoteCmd(ctx, startCmd); err != nil {
|
||||
return fmt.Errorf("failed to start %s: %w", iface, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user