ssh $SOURCE_SERVER "docker images --format '{{.Repository}}:{{.Tag}}' | grep -v '<none>' | while read image; do docker save -o $BACKUP_DIR/\$(echo \$image | tr '/:' '_').tar \$image; done"
# 备份容器配置
ssh $SOURCE_SERVER "docker ps -a --format '{{.Names}}' | while read container; do docker inspect \$container > $BACKUP_DIR/\${container}_config.json; done"
# 备份数据卷
ssh $SOURCE_SERVER "docker volume ls -q | while read volume; do docker run --rm -v \$volume:/volume -v $BACKUP_DIR:/backup alpine tar cvf /backup/\${volume}.tar /volume; done"
docker run -d --name prometheus -p 9090:9090 -v /path/to/prometheus.yml:/etc/prometheus/prometheus.yml prom/prometheus
# 安装Grafana
docker run -d --name grafana -p 3000:3000 grafana/grafana
# 使用ELK Stack管理日志
# 安装Elasticsearch
docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.10.1
# 安装Logstash
docker run -d --name logstash --link elasticsearch:elasticsearch -v /path/to/logstash.conf:/usr/share/logstash/pipeline/logstash.conf logstash:7.10.1
# 安装Kibana
docker run -d --name kibana --link elasticsearch:elasticsearch -p 5601:5601 kibana:7.10.1
# 配置Docker日志驱动
docker run --log-driver=syslog --log-opt syslog-address=tcp://logstash-host:5000 my_image
复制代码
5. 定期测试恢复流程
定期测试恢复流程,确保在发生灾难时能够快速恢复业务:
#!/bin/bash
# 灾难恢复测试脚本
# 配置备份服务器
BACKUP_SERVER="user@backup-server"
BACKUP_DIR="/path/to/backups"
# 创建测试环境
mkdir -p /tmp/recovery_test
cd /tmp/recovery_test
# 从备份服务器下载最新的备份
scp -r $BACKUP_SERVER:$BACKUP_DIR/latest .
# 恢复容器镜像
for file in latest/*.tar; do docker load -i $file; done
# 恢复数据卷
for file in latest/volume_*.tar; do volume=$(basename $file .tar | sed 's/volume_//'); docker volume create $volume; docker run --rm -v $volume:/volume -v $(pwd):/backup alpine tar xvf /backup/$file -C /; done
# 启动容器
for config in latest/*_config.json; do container=$(basename $config _config.json); docker create --name $container $(cat $config | jq -r '.[0].Config.Image'); docker start $container; done