redis集群架构方案

Posted by zengchengjie on Friday, January 9, 2026

Redis高可用与集群方案完全指南

一、Redis高可用架构演进全景

演进路径:
单节点Redis → 主从复制 → 哨兵模式 → Redis Cluster → Proxy方案

二、Redis主从复制(Master-Slave Replication)

2.1 复制原理与架构

工作原理:
Master: 写操作 → 内存变更 → 命令传播/增量同步 → Slave
Slave: 连接Master → 全量/增量同步 → 执行命令 → 保持数据一致

2.2 主从复制配置实战

2.2.1 主节点配置

# redis-master.conf
# 基础配置
port 6379
bind 0.0.0.0
daemonize yes
pidfile /var/run/redis_6379.pid
logfile "/var/log/redis/redis-6379.log"
dir /var/lib/redis/6379

# 内存配置
maxmemory 2gb
maxmemory-policy allkeys-lru

# 持久化配置
save 900 1      # 900秒内至少有1个key被改变
save 300 10     # 300秒内至少有10个key被改变
save 60 10000   # 60秒内至少有10000个key被改变
rdbcompression yes
dbfilename dump-6379.rdb

# 复制相关配置
repl-backlog-size 256mb     # 复制积压缓冲区大小
repl-backlog-ttl 3600       # 缓冲区存活时间
repl-timeout 60             # 复制超时时间
repl-disable-tcp-nodelay no # 启用TCP_NODELAY

# 安全配置
requirepass MasterRedisPass123
masterauth MasterRedisPass123  # 主从认证密码

# 性能监控
slowlog-log-slower-than 10000  # 慢查询阈值(微秒)
slowlog-max-len 128            # 慢查询日志长度

2.2.2 从节点配置

# redis-slave-6380.conf
port 6380
bind 0.0.0.0
daemonize yes
pidfile /var/run/redis_6380.pid
logfile "/var/log/redis/redis-6380.log"
dir /var/lib/redis/6380

# 主从复制配置
slaveof 192.168.1.100 6379
masterauth MasterRedisPass123  # 主节点密码

# 从节点只读(默认)
slave-read-only yes

# 复制延迟配置
repl-ping-slave-period 10      # 从库ping主库频率
repl-timeout 60                # 复制超时时间
min-slaves-to-write 1          # 最少从库数量
min-slaves-max-lag 10          # 最大延迟秒数

# 持久化优化(从库可以关闭AOF)
save ""                        # 关闭RDB保存
appendonly no                  # 关闭AOF

# 安全配置
requirepass SlaveRedisPass123

2.2.3 手动配置主从

# 启动Redis实例
redis-server /path/to/redis-master.conf
redis-server /path/to/redis-slave.conf

# 查看主节点信息
redis-cli -h 192.168.1.100 -p 6379 -a MasterRedisPass123
> INFO replication
# 输出:
# role:master
# connected_slaves:1
# slave0:ip=192.168.1.101,port=6380,state=online,offset=...

# 动态配置主从(无需重启)
# 在从节点执行:
redis-cli -h 192.168.1.101 -p 6380 -a SlaveRedisPass123
> SLAVEOF 192.168.1.100 6379  # 设置主节点
> SLAVEOF NO ONE              # 取消复制,变为主节点

# 查看复制状态
> INFO replication
# 关键指标:
# master_link_status:up      # 连接状态
# master_last_io_seconds_ago:1  # 最后通信时间
# slave_repl_offset:12345    # 复制偏移量
# master_repl_offset:12345   # 主库偏移量

2.3 复制过程详解

2.3.1 全量同步流程

# 当从节点第一次连接或复制断开太久时触发
1. 从节点发送PSYNC命令请求同步
2. 主节点执行BGSAVE生成RDB文件
3. 主节点将RDB发送给从节点
4. 从节点清空旧数据,加载RDB
5. 主节点将复制缓冲区中的命令发送给从节点
6. 从节点执行这些命令,达到同步状态

2.3.2 增量同步流程

# 当从节点短暂断开后重新连接时触发
1. 从节点发送PSYNC命令,带上复制偏移量
2. 主节点检查复制积压缓冲区
3. 如果偏移量在缓冲区范围内,发送增量数据
4. 从节点执行增量命令,达到同步状态

2.4 主从复制监控脚本

#!/bin/bash
# monitor_redis_replication.sh

MASTER_HOST="192.168.1.100"
MASTER_PORT=6379
MASTER_PASS="MasterRedisPass123"
SLAVE_HOST="192.168.1.101"
SLAVE_PORT=6380
SLAVE_PASS="SlaveRedisPass123"
ALERT_EMAIL="devops@company.com"

# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

check_replication_status() {
    echo -e "${YELLOW}=== Redis Replication Status Check ===${NC}"
    
    # 检查主节点
    echo -e "\n${YELLOW}[Master: ${MASTER_HOST}:${MASTER_PORT}]${NC}"
    MASTER_INFO=$(redis-cli -h $MASTER_HOST -p $MASTER_PORT -a $MASTER_PASS INFO replication 2>/dev/null)
    
    if [ $? -ne 0 ]; then
        echo -e "${RED}✗ Cannot connect to master${NC}"
        send_alert "Redis Master Unreachable" "无法连接主节点 ${MASTER_HOST}:${MASTER_PORT}"
        return 1
    fi
    
    # 解析主节点信息
    ROLE=$(echo "$MASTER_INFO" | grep "role:" | cut -d: -f2 | tr -d '\r')
    CONNECTED_SLAVES=$(echo "$MASTER_INFO" | grep "connected_slaves:" | cut -d: -f2 | tr -d '\r')
    MASTER_REPL_OFFSET=$(echo "$MASTER_INFO" | grep "master_repl_offset:" | cut -d: -f2 | tr -d '\r')
    
    echo "Role: $ROLE"
    echo "Connected Slaves: $CONNECTED_SLAVES"
    echo "Replication Offset: $MASTER_REPL_OFFSET"
    
    # 检查从节点
    echo -e "\n${YELLOW}[Slave: ${SLAVE_HOST}:${SLAVE_PORT}]${NC}"
    SLAVE_INFO=$(redis-cli -h $SLAVE_HOST -p $SLAVE_PORT -a $SLAVE_PASS INFO replication 2>/dev/null)
    
    if [ $? -ne 0 ]; then
        echo -e "${RED}✗ Cannot connect to slave${NC}"
        send_alert "Redis Slave Unreachable" "无法连接从节点 ${SLAVE_HOST}:${SLAVE_PORT}"
        return 1
    fi
    
    # 解析从节点信息
    SLAVE_ROLE=$(echo "$SLAVE_INFO" | grep "role:" | cut -d: -f2 | tr -d '\r')
    MASTER_LINK_STATUS=$(echo "$SLAVE_INFO" | grep "master_link_status:" | cut -d: -f2 | tr -d '\r')
    SLAVE_REPL_OFFSET=$(echo "$SLAVE_INFO" | grep "slave_repl_offset:" | cut -d: -f2 | tr -d '\r')
    MASTER_LAST_IO=$(echo "$SLAVE_INFO" | grep "master_last_io_seconds_ago:" | cut -d: -f2 | tr -d '\r')
    
    echo "Role: $SLAVE_ROLE"
    echo "Master Link Status: $MASTER_LINK_STATUS"
    echo "Slave Offset: $SLAVE_REPL_OFFSET"
    echo "Last IO Seconds Ago: $MASTER_LAST_IO"
    
    # 检查复制状态
    if [ "$MASTER_LINK_STATUS" != "up" ]; then
        echo -e "${RED}✗ Replication link is down${NC}"
        send_alert "Redis Replication Down" "主从复制连接中断"
        return 1
    fi
    
    # 检查复制延迟
    OFFSET_DIFF=$((MASTER_REPL_OFFSET - SLAVE_REPL_OFFSET))
    if [ $OFFSET_DIFF -gt 1000 ]; then
        echo -e "${YELLOW}⚠ Replication delay: $OFFSET_DIFF bytes${NC}"
        if [ $OFFSET_DIFF -gt 100000 ]; then
            send_alert "Redis Replication Delay" "复制延迟过高: $OFFSET_DIFF bytes"
        fi
    else
        echo -e "${GREEN}✓ Replication is healthy${NC}"
    fi
    
    # 检查从节点是否可写(不应该可写)
    echo -e "\n${YELLOW}Checking slave write permission...${NC}"
    WRITE_TEST=$(redis-cli -h $SLAVE_HOST -p $SLAVE_PORT -a $SLAVE_PASS SET test_write "1" 2>&1)
    if [[ $WRITE_TEST == *"READONLY"* ]]; then
        echo -e "${GREEN}✓ Slave is read-only (correct)${NC}"
    else
        echo -e "${RED}✗ Slave is writable (security risk!)${NC}"
        send_alert "Redis Security Alert" "从节点可写,存在安全风险"
    fi
    
    return 0
}

# 自动修复复制问题
auto_fix_replication() {
    echo -e "\n${YELLOW}Attempting to fix replication issues...${NC}"
    
    # 1. 尝试重新建立主从关系
    redis-cli -h $SLAVE_HOST -p $SLAVE_PORT -a $SLAVE_PASS SLAVEOF $MASTER_HOST $MASTER_PORT
    
    # 等待同步
    sleep 5
    
    # 2. 检查是否修复成功
    SLAVE_INFO=$(redis-cli -h $SLAVE_HOST -p $SLAVE_PORT -a $SLAVE_PASS INFO replication 2>/dev/null)
    MASTER_LINK_STATUS=$(echo "$SLAVE_INFO" | grep "master_link_status:" | cut -d: -f2 | tr -d '\r')
    
    if [ "$MASTER_LINK_STATUS" = "up" ]; then
        echo -e "${GREEN}✓ Replication fixed successfully${NC}"
        return 0
    else
        echo -e "${RED}✗ Failed to fix replication${NC}"
        return 1
    fi
}

send_alert() {
    SUBJECT="$1"
    MESSAGE="$2"
    
    # 发送邮件
    echo -e "Time: $(date '+%Y-%m-%d %H:%M:%S')\n\n$MESSAGE" | \
        mail -s "[Redis Alert] $SUBJECT" $ALERT_EMAIL
    
    # 发送企业微信
    curl -X POST "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY" \
        -H "Content-Type: application/json" \
        -d "{
            \"msgtype\": \"markdown\",
            \"markdown\": {
                \"content\": \"**Redis告警**\n> **主题**: $SUBJECT\n> **详情**: $MESSAGE\n> **时间**: $(date '+%Y-%m-%d %H:%M:%S')\"
            }
        }" > /dev/null 2>&1
}

# 主监控循环
while true; do
    clear
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checking Redis replication..."
    
    check_replication_status
    STATUS=$?
    
    if [ $STATUS -ne 0 ]; then
        echo -e "\n${YELLOW}Trying auto-repair...${NC}"
        auto_fix_replication
        
        if [ $? -ne 0 ]; then
            echo -e "${RED}Auto-repair failed, manual intervention required${NC}"
        fi
    fi
    
    echo -e "\n${YELLOW}Next check in 30 seconds...${NC}"
    sleep 30
done

三、Redis哨兵模式(Sentinel)

3.1 哨兵架构与原理

架构:
[Client] → [Sentinel1] ↔ [Sentinel2] ↔ [Sentinel3](奇数个,如3或5)
               ↓             ↓             ↓
           [Master] ← 监控和故障发现 → [Slave1, Slave2]

3.2 哨兵部署配置

3.2.1 哨兵配置文件

# sentinel-26379.conf
# 基础配置
port 26379
bind 0.0.0.0
daemonize yes
pidfile "/var/run/redis-sentinel-26379.pid"
logfile "/var/log/redis/sentinel-26379.log"
dir "/var/lib/redis/sentinel"

# 监控配置
sentinel monitor mymaster 192.168.1.100 6379 2
# mymaster: 集群名称
# 192.168.1.100: 主节点IP
# 6379: 主节点端口
# 2: 仲裁数(需要至少2个哨兵同意才能故障转移)

# 故障判定配置
sentinel down-after-milliseconds mymaster 5000
# 5000毫秒无响应认为主观下线

sentinel failover-timeout mymaster 60000
# 故障转移超时时间

sentinel parallel-syncs mymaster 1
# 故障转移时,同时向新主节点同步的从节点数量

# 认证配置
sentinel auth-pass mymaster MasterRedisPass123

# 从节点发现
sentinel known-slave mymaster 192.168.1.101 6380
sentinel known-slave mymaster 192.168.1.102 6381

# 其他哨兵节点
sentinel known-sentinel mymaster 192.168.1.110 26380 6a5b8d...
sentinel known-sentinel mymaster 192.168.1.111 26381 8c7d9e...

# 保护模式
sentinel deny-scripts-reconfig yes

# 通知脚本(可选)
sentinel notification-script mymaster /etc/redis/notify.sh
sentinel client-reconfig-script mymaster /etc/redis/reconfig.sh

3.2.2 多哨兵节点配置

# sentinel-26380.conf
port 26380
bind 0.0.0.0
daemonize yes
pidfile "/var/run/redis-sentinel-26380.pid"
logfile "/var/log/redis/sentinel-26380.log"
dir "/var/lib/redis/sentinel-26380"

sentinel monitor mymaster 192.168.1.100 6379 2
sentinel down-after-milliseconds mymaster 5000
sentinel auth-pass mymaster MasterRedisPass123

# sentinel-26381.conf
port 26381
bind 0.0.0.0
daemonize yes
pidfile "/var/run/redis-sentinel-26381.pid"
logfile "/var/log/redis/sentinel-26381.log"
dir "/var/lib/redis/sentinel-26381"

sentinel monitor mymaster 192.168.1.100 6379 2
sentinel down-after-milliseconds mymaster 5000
sentinel auth-pass mymaster MasterRedisPass123

3.2.3 启动哨兵

# 启动所有哨兵节点
redis-sentinel /etc/redis/sentinel-26379.conf
redis-sentinel /etc/redis/sentinel-26380.conf
redis-sentinel /etc/redis/sentinel-26381.conf

# 或者使用Redis Server模式启动
redis-server /etc/redis/sentinel-26379.conf --sentinel

3.3 哨兵操作命令

# 连接哨兵
redis-cli -h 192.168.1.110 -p 26379

# 查看哨兵信息
> SENTINEL masters              # 查看监控的主节点
> SENTINEL master mymaster      # 查看指定主节点信息
> SENTINEL slaves mymaster      # 查看从节点信息
> SENTINEL sentinels mymaster   # 查看其他哨兵信息
> SENTINEL get-master-addr-by-name mymaster  # 获取当前主节点地址

# 手动故障转移
> SENTINEL failover mymaster    # 强制故障转移

# 监控哨兵状态
> INFO sentinel
# 输出:
# sentinel_masters:1
# sentinel_tilt:0
# sentinel_running_scripts:0
# sentinel_scripts_queue_length:0
# sentinel_simulate_failure_flags:0
# master0:name=mymaster,status=ok,address=192.168.1.100:6379,slaves=2,sentinels=3

# 查看哨兵日志
tail -f /var/log/redis/sentinel-26379.log

3.4 客户端连接哨兵

// Spring Boot配置
@Configuration
public class RedisSentinelConfig {
    
    @Bean
    public RedisConnectionFactory redisConnectionFactory() {
        RedisSentinelConfiguration sentinelConfig = 
            new RedisSentinelConfiguration()
                .master("mymaster")
                .sentinel("192.168.1.110", 26379)
                .sentinel("192.168.1.111", 26380)
                .sentinel("192.168.1.112", 26381);
        
        sentinelConfig.setPassword(RedisPassword.of("MasterRedisPass123"));
        
        JedisClientConfiguration jedisConfig = JedisClientConfiguration.builder()
            .connectTimeout(Duration.ofSeconds(5))
            .readTimeout(Duration.ofSeconds(3))
            .usePooling()
            .build();
        
        return new JedisConnectionFactory(sentinelConfig, jedisConfig);
    }
    
    @Bean
    public RedisTemplate<String, Object> redisTemplate() {
        RedisTemplate<String, Object> template = new RedisTemplate<>();
        template.setConnectionFactory(redisConnectionFactory());
        template.setKeySerializer(new StringRedisSerializer());
        template.setValueSerializer(new GenericJackson2JsonRedisSerializer());
        return template;
    }
}
# Python客户端连接
import redis
from redis.sentinel import Sentinel

# 连接哨兵
sentinel = Sentinel([
    ('192.168.1.110', 26379),
    ('192.168.1.111', 26380),
    ('192.168.1.112', 26381)
], socket_timeout=0.5, password='MasterRedisPass123')

# 获取主节点连接
master = sentinel.master_for('mymaster', socket_timeout=0.5)
master.set('key', 'value')

# 获取从节点连接(用于读操作)
slave = sentinel.slave_for('mymaster', socket_timeout=0.5)
value = slave.get('key')

3.5 哨兵监控与告警脚本

#!/bin/bash
# monitor_redis_sentinel.sh

SENTINELS=(
    "192.168.1.110:26379"
    "192.168.1.111:26380"
    "192.168.1.112:26381"
)
MASTER_NAME="mymaster"
ALERT_EMAIL="devops@company.com"
DINGDING_WEBHOOK="https://oapi.dingtalk.com/robot/send?access_token=YOUR_TOKEN"

check_sentinel_status() {
    echo "=== Redis Sentinel Status Check ==="
    
    for SENTINEL in "${SENTINELS[@]}"; do
        HOST=$(echo $SENTINEL | cut -d: -f1)
        PORT=$(echo $SENTINEL | cut -d: -f2)
        
        echo -e "\n[Sentinel: $HOST:$PORT]"
        
        # 检查哨兵是否存活
        if ! redis-cli -h $HOST -p $PORT PING 2>/dev/null | grep -q "PONG"; then
            echo "✗ Sentinel is down"
            send_alert "Sentinel Down" "哨兵节点 $HOST:$PORT 不可用"
            continue
        fi
        
        # 获取哨兵信息
        INFO=$(redis-cli -h $HOST -p $PORT INFO sentinel 2>/dev/null)
        
        if [ -z "$INFO" ]; then
            echo "✗ Cannot get sentinel info"
            continue
        fi
        
        # 解析信息
        MASTER_COUNT=$(echo "$INFO" | grep "sentinel_masters:" | cut -d: -f2)
        TILT_STATUS=$(echo "$INFO" | grep "sentinel_tilt:" | cut -d: -f2)
        
        echo "✓ Sentinel is running"
        echo "  Master count: $MASTER_COUNT"
        echo "  Tilt status: $TILT_STATUS"
        
        # 检查是否处于Tilt模式
        if [ "$TILT_STATUS" != "0" ]; then
            echo "⚠ Sentinel is in TILT mode"
            send_alert "Sentinel TILT" "哨兵 $HOST:$PORT 进入TILT模式"
        fi
        
        # 检查主节点状态
        MASTER_INFO=$(redis-cli -h $HOST -p $PORT SENTINEL master $MASTER_NAME 2>/dev/null)
        
        if [ -n "$MASTER_INFO" ]; then
            MASTER_STATUS=$(echo "$MASTER_INFO" | grep -A1 "status" | tail -1)
            MASTER_ADDR=$(echo "$MASTER_INFO" | grep -A1 "ip" | tail -1)
            MASTER_PORT=$(echo "$MASTER_INFO" | grep -A1 "port" | tail -1)
            
            echo "  Master status: $MASTER_STATUS"
            echo "  Master address: $MASTER_ADDR:$MASTER_PORT"
            
            if [ "$MASTER_STATUS" != "ok" ]; then
                send_alert "Master Status Abnormal" "主节点状态异常: $MASTER_STATUS"
            fi
        fi
    done
}

check_failover_status() {
    echo -e "\n=== Failover History ==="
    
    for SENTINEL in "${SENTINELS[@]}"; do
        HOST=$(echo $SENTINEL | cut -d: -f1)
        PORT=$(echo $SENTINEL | cut -d: -f2)
        
        # 获取故障转移历史
        LOG_ENTRIES=$(tail -20 /var/log/redis/sentinel-$PORT.log | grep -E "failover|switch-master")
        
        if [ -n "$LOG_ENTRIES" ]; then
            echo -e "\n[Sentinel $HOST:$PORT recent failover events:]"
            echo "$LOG_ENTRIES"
            
            # 检查最近是否有故障转移
            RECENT_FAILOVER=$(echo "$LOG_ENTRIES" | grep "failover")
            if [ -n "$RECENT_FAILOVER" ]; then
                send_alert "Recent Failover Detected" "哨兵 $HOST:$PORT 检测到最近的故障转移"
            fi
        fi
    done
}

check_sentinel_consensus() {
    echo -e "\n=== Sentinel Consensus Check ==="
    
    declare -A MASTER_ADDRESSES
    
    for SENTINEL in "${SENTINELS[@]}"; do
        HOST=$(echo $SENTINEL | cut -d: -f1)
        PORT=$(echo $SENTINEL | cut -d: -f2)
        
        # 获取每个哨兵认为的主节点
        CURRENT_MASTER=$(redis-cli -h $HOST -p $PORT SENTINEL get-master-addr-by-name $MASTER_NAME 2>/dev/null)
        
        if [ -n "$CURRENT_MASTER" ]; then
            MASTER_IP=$(echo $CURRENT_MASTER | cut -d' ' -f1)
            MASTER_PORT=$(echo $CURRENT_MASTER | cut -d' ' -f2)
            KEY="$MASTER_IP:$MASTER_PORT"
            
            MASTER_ADDRESSES[$KEY]=$((MASTER_ADDRESSES[$KEY] + 1))
            
            echo "Sentinel $HOST:$PORT thinks master is: $KEY"
        fi
    done
    
    # 检查一致性
    if [ ${#MASTER_ADDRESSES[@]} -gt 1 ]; then
        echo "⚠ Sentinel consensus broken! Different masters detected:"
        for MASTER in "${!MASTER_ADDRESSES[@]}"; do
            echo "  $MASTER: ${MASTER_ADDRESSES[$MASTER]} votes"
        done
        send_alert "Sentinel Consensus Broken" "哨兵节点间主节点信息不一致"
    else
        for MASTER in "${!MASTER_ADDRESSES[@]}"; do
            echo "✓ All sentinels agree master is: $MASTER"
        done
    fi
}

send_alert() {
    SUBJECT="$1"
    MESSAGE="$2"
    
    # 发送邮件
    echo -e "Time: $(date '+%Y-%m-%d %H:%M:%S')\n\n$MESSAGE" | \
        mail -s "[Redis Sentinel Alert] $SUBJECT" $ALERT_EMAIL
    
    # 发送钉钉消息
    curl -X POST $DINGDING_WEBHOOK \
        -H 'Content-Type: application/json' \
        -d "{
            \"msgtype\": \"markdown\",
            \"markdown\": {
                \"title\": \"Redis哨兵告警\",
                \"text\": \"## Redis哨兵告警\\n**主题**: $SUBJECT\\n**详情**: $MESSAGE\\n**时间**: $(date '+%Y-%m-%d %H:%M:%S')\\n\"
            }
        }" > /dev/null 2>&1
}

# 主监控循环
while true; do
    clear
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checking Redis Sentinel cluster..."
    
    check_sentinel_status
    check_failover_status
    check_sentinel_consensus
    
    echo -e "\nNext check in 60 seconds..."
    sleep 60
done

四、Redis集群模式(Redis Cluster)

4.1 Redis Cluster架构

Redis Cluster架构:
- 数据分片:16384个哈希槽(hash slot)
- 高可用:每个分片都有主从复制
- 自动故障转移:类似哨兵机制
- 客户端路由:客户端缓存slot映射

4.2 Redis Cluster部署

4.2.1 集群节点配置

# redis-cluster-7000.conf(主节点1)
port 7000
bind 0.0.0.0
daemonize yes
pidfile /var/run/redis_7000.pid
logfile "/var/log/redis/redis-7000.log"
dir /var/lib/redis/7000

# 集群配置
cluster-enabled yes
cluster-config-file nodes-7000.conf
cluster-node-timeout 15000  # 节点超时时间(毫秒)
cluster-replica-validity-factor 10  # 从节点有效性因子
cluster-migration-barrier 1  # 迁移屏障
cluster-require-full-coverage yes  # 需要所有槽位都被覆盖

# 持久化
appendonly yes
appendfsync everysec

# 内存
maxmemory 4gb
maxmemory-policy allkeys-lru

# 性能
tcp-backlog 511
timeout 0
tcp-keepalive 300

# 安全
requirepass ClusterRedisPass123
masterauth ClusterRedisPass123

# redis-cluster-7001.conf(从节点1)
port 7001
bind 0.0.0.0
daemonize yes
pidfile /var/run/redis_7001.pid
logfile "/var/log/redis/redis-7001.log"
dir /var/lib/redis/7001

cluster-enabled yes
cluster-config-file nodes-7001.conf
cluster-node-timeout 15000
requirepass ClusterRedisPass123
masterauth ClusterRedisPass123

# 继续配置其他节点:7002-7005

4.2.2 创建集群

# 启动所有节点
for port in {7000..7005}; do
    redis-server /etc/redis/redis-cluster-$port.conf
done

# 使用redis-cli创建集群(Redis 5+)
# 格式:redis-cli --cluster create host1:port1 ... hostN:portN --cluster-replicas 1
redis-cli --cluster create \
  192.168.1.100:7000 \
  192.168.1.101:7001 \
  192.168.1.102:7002 \
  192.168.1.103:7003 \
  192.168.1.104:7004 \
  192.168.1.105:7005 \
  --cluster-replicas 1 \
  -a ClusterRedisPass123

# 输出示例:
# >>> Performing hash slots allocation on 6 nodes...
# Master[0] -> Slots 0 - 5460
# Master[1] -> Slots 5461 - 10922
# Master[2] -> Slots 10923 - 16383
# Adding replica 192.168.1.103:7003 to 192.168.1.100:7000
# Adding replica 192.168.1.104:7004 to 192.168.1.101:7001
# Adding replica 192.168.1.105:7005 to 192.168.1.102:7002
# >>> Trying to optimize slaves allocation for anti-affinity
# >>> Check for open slots...
# >>> Check slots coverage...
# [OK] All 16384 slots covered.

# 验证集群状态
redis-cli -c -h 192.168.1.100 -p 7000 -a ClusterRedisPass123
> CLUSTER INFO
# cluster_state:ok
# cluster_slots_assigned:16384
# cluster_slots_ok:16384
# cluster_slots_pfail:0
# cluster_slots_fail:0
# cluster_known_nodes:6
# cluster_size:3
# cluster_current_epoch:6
# cluster_my_epoch:1
# cluster_stats_messages_ping_sent:...

4.2.3 集群扩容

# 添加新主节点
# 1. 启动新节点(7006作为新主节点)
redis-server /etc/redis/redis-cluster-7006.conf

# 2. 添加到集群
redis-cli --cluster add-node \
  192.168.1.106:7006 \
  192.168.1.100:7000 \
  -a ClusterRedisPass123

# 3. 为新节点分配槽位
# 从现有节点迁移一部分槽位到新节点
redis-cli --cluster reshard \
  192.168.1.100:7000 \
  --cluster-from node-id1,node-id2 \
  --cluster-to node-id-new \
  --cluster-slots 4096 \
  --cluster-yes \
  -a ClusterRedisPass123

# 添加新从节点
# 1. 启动新节点(7007作为从节点)
redis-server /etc/redis/redis-cluster-7007.conf

# 2. 添加到集群并指定主节点
redis-cli --cluster add-node \
  192.168.1.107:7007 \
  192.168.1.100:7000 \
  --cluster-slave \
  --cluster-master-id <master-node-id> \
  -a ClusterRedisPass123

4.3 集群操作与监控

# 连接集群(-c 参数启用集群模式)
redis-cli -c -h 192.168.1.100 -p 7000 -a ClusterRedisPass123

# 集群操作命令
> CLUSTER NODES          # 查看所有节点信息
> CLUSTER INFO           # 查看集群信息
> CLUSTER SLOTS          # 查看槽位分布
> CLUSTER KEYSLOT key    # 查看key所在的槽位

# 节点管理
> CLUSTER MEET ip port   # 添加节点到集群
> CLUSTER FORGET node-id # 从集群移除节点
> CLUSTER REPLICATE node-id  # 设置为指定节点的从节点

# 槽位管理
> CLUSTER ADDSLOTS slot [slot ...]      # 分配槽位
> CLUSTER DELSLOTS slot [slot ...]      # 移除槽位
> CLUSTER SETSLOT slot NODE node-id     # 设置槽位所属节点
> CLUSTER SETSLOT slot MIGRATING node-id # 迁移槽位(源)
> CLUSTER SETSLOT slot IMPORTING node-id # 迁移槽位(目标)

# 故障转移
> CLUSTER FAILOVER [FORCE|TAKEOVER]     # 手动故障转移

4.4 Java客户端连接Redis Cluster

@Configuration
public class RedisClusterConfig {
    
    @Bean
    public RedisConnectionFactory redisConnectionFactory() {
        RedisClusterConfiguration clusterConfig = new RedisClusterConfiguration();
        
        // 添加集群节点(只需要添加部分节点,客户端会自动发现其他节点)
        clusterConfig.addClusterNode(new RedisNode("192.168.1.100", 7000));
        clusterConfig.addClusterNode(new RedisNode("192.168.1.101", 7001));
        clusterConfig.addClusterNode(new RedisNode("192.168.1.102", 7002));
        
        // 配置密码
        clusterConfig.setPassword(RedisPassword.of("ClusterRedisPass123"));
        
        // 集群拓扑刷新设置
        clusterConfig.setMaxRedirects(3);  // 最大重定向次数
        
        // Jedis配置
        JedisPoolConfig poolConfig = new JedisPoolConfig();
        poolConfig.setMaxTotal(100);
        poolConfig.setMaxIdle(20);
        poolConfig.setMinIdle(5);
        poolConfig.setMaxWaitMillis(3000);
        poolConfig.setTestOnBorrow(true);
        
        JedisClientConfiguration jedisConfig = JedisClientConfiguration.builder()
            .connectTimeout(Duration.ofSeconds(5))
            .readTimeout(Duration.ofSeconds(3))
            .usePooling()
            .poolConfig(poolConfig)
            .build();
        
        return new JedisConnectionFactory(clusterConfig, jedisConfig);
    }
    
    @Bean
    public RedisTemplate<String, Object> redisTemplate() {
        RedisTemplate<String, Object> template = new RedisTemplate<>();
        template.setConnectionFactory(redisConnectionFactory());
        
        // 使用String序列化器
        template.setKeySerializer(new StringRedisSerializer());
        template.setValueSerializer(new GenericJackson2JsonRedisSerializer());
        template.setHashKeySerializer(new StringRedisSerializer