MySQL高可用与扩展方案完全指南

Posted by zengchengjie on Saturday, January 10, 2026

MySQL高可用与扩展方案完全指南

一、MySQL高可用架构演进全景

演进路径:
单机MySQL → 主从复制 → 主从+读写分离 → MHA/MMM高可用 → InnoDB Cluster → 云数据库

二、主从复制(Replication)详解

2.1 复制原理与架构

-- 复制原理:基于二进制日志(binlog)
主库:写操作 → 二进制日志(binlog)
从库:IO线程拉取binlog → SQL线程重放 → 数据同步

2.2 复制配置实战

2.2.1 主库配置

# /etc/mysql/my.cnf 或 /etc/my.cnf
[mysqld]
# 服务器ID,集群内唯一
server-id = 1
# 启用二进制日志
log_bin = /var/lib/mysql/mysql-bin.log
# 二进制日志格式:ROW/MIXED/STATEMENT
binlog_format = ROW
# 二进制日志保留天数
expire_logs_days = 7
# 最大二进制日志大小
max_binlog_size = 100M
# 需要复制的数据库(可选)
binlog_do_db = order_db
# 忽略复制的数据库(可选)
binlog_ignore_db = test_db
# 从库默认只读
read_only = 0
# 启用GTID(推荐)
gtid_mode = ON
enforce_gtid_consistency = ON

2.2.2 创建复制用户

-- 在主库执行
-- 创建专门用于复制的用户
CREATE USER 'repl'@'%' IDENTIFIED BY 'ReplPassword123!';
GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';
FLUSH PRIVILEGES;

-- 查看主库状态
SHOW MASTER STATUS;
-- 输出:
-- +------------------+----------+--------------+------------------+-------------------+
-- | File             | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
-- +------------------+----------+--------------+------------------+-------------------+
-- | mysql-bin.000003 |      107 | order_db     | test_db          |                   |
-- +------------------+----------+--------------+------------------+-------------------+

2.2.3 从库配置

# 从库配置文件
[mysqld]
server-id = 2  # 必须与主库不同
log_bin = /var/lib/mysql/mysql-bin.log
relay_log = /var/lib/mysql/relay-bin.log
read_only = 1  # 从库只读
super_read_only = 1  # 超级用户也只读
log_slave_updates = 1  # 从库也写binlog(用于级联复制)
gtid_mode = ON
enforce_gtid_consistency = ON

2.2.4 配置从库连接主库

-- 在从库执行

-- 方法1:基于二进制日志位置的传统复制
CHANGE MASTER TO
  MASTER_HOST = '192.168.1.100',
  MASTER_PORT = 3306,
  MASTER_USER = 'repl',
  MASTER_PASSWORD = 'ReplPassword123!',
  MASTER_LOG_FILE = 'mysql-bin.000003',
  MASTER_LOG_POS = 107;

-- 方法2:基于GTID的复制(推荐)
CHANGE MASTER TO
  MASTER_HOST = '192.168.1.100',
  MASTER_PORT = 3306,
  MASTER_USER = 'repl',
  MASTER_PASSWORD = 'ReplPassword123!',
  MASTER_AUTO_POSITION = 1;  -- 自动位置识别

-- 启动复制
START SLAVE;

-- 查看复制状态
SHOW SLAVE STATUS\G;
-- 关键字段检查:
-- Slave_IO_Running: Yes
-- Slave_SQL_Running: Yes
-- Seconds_Behind_Master: 0  # 复制延迟秒数
-- Last_IO_Error: 
-- Last_SQL_Error:

-- 查看所有从库连接
SHOW SLAVE HOSTS;

2.3 复制模式对比

复制模式 优点 缺点 适用场景
异步复制 性能好,主库无延迟 可能丢失数据 大多数业务
半同步复制 数据更安全 性能略差 金融、交易类
组复制 强一致性,高可用 配置复杂 关键业务

2.3.1 半同步复制配置

-- 主从库都需要安装插件
INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';

-- 主库配置
SET GLOBAL rpl_semi_sync_master_enabled = 1;
SET GLOBAL rpl_semi_sync_master_timeout = 1000;  -- 1秒超时

-- 从库配置
SET GLOBAL rpl_semi_sync_slave_enabled = 1;

-- 重启复制使配置生效
STOP SLAVE IO_THREAD;
START SLAVE IO_THREAD;

-- 查看状态
SHOW VARIABLES LIKE 'rpl_semi_sync%';
SHOW STATUS LIKE 'Rpl_semi_sync%';

2.4 监控与管理脚本

#!/bin/bash
# monitor_replication.sh

MYSQL_USER="monitor"
MYSQL_PASS="MonitorPass123"
MYSQL_HOST="localhost"
ALERT_EMAIL="dba@company.com"

check_replication() {
    # 获取复制状态
    STATUS=$(mysql -u$MYSQL_USER -p$MYSQL_PASS -h$MYSQL_HOST -e "SHOW SLAVE STATUS\G" 2>/dev/null)
    
    if [ -z "$STATUS" ]; then
        echo "ERROR: Cannot connect to MySQL"
        return 1
    fi
    
    # 解析关键指标
    IO_RUNNING=$(echo "$STATUS" | grep "Slave_IO_Running" | awk '{print $2}')
    SQL_RUNNING=$(echo "$STATUS" | grep "Slave_SQL_Running" | awk '{print $2}')
    DELAY=$(echo "$STATUS" | grep "Seconds_Behind_Master" | awk '{print $2}')
    LAST_ERROR=$(echo "$STATUS" | grep "Last_Error" | awk -F': ' '{print $2}')
    
    # 检查状态
    if [ "$IO_RUNNING" != "Yes" ] || [ "$SQL_RUNNING" != "Yes" ]; then
        send_alert "Replication Stopped" "IO: $IO_RUNNING, SQL: $SQL_RUNNING, Error: $LAST_ERROR"
        return 1
    fi
    
    # 检查延迟
    if [ "$DELAY" -gt 60 ]; then  # 超过60秒延迟
        send_alert "Replication Delay High" "Delay: $DELAY seconds"
        return 2
    fi
    
    echo "OK: Replication running normally, Delay: $DELAY seconds"
    return 0
}

send_alert() {
    SUBJECT="$1"
    MESSAGE="$2"
    echo "ALERT: $SUBJECT - $MESSAGE" | mail -s "[MySQL Alert] $SUBJECT" $ALERT_EMAIL
    # 也可以发送到钉钉/企业微信
    curl -X POST -H "Content-Type: application/json" \
         -d "{\"msgtype\": \"text\", \"text\": {\"content\": \"MySQL告警: $SUBJECT\\n$MESSAGE\"}}" \
         https://oapi.dingtalk.com/robot/send?access_token=YOUR_TOKEN
}

# 自动修复常见问题
auto_repair_replication() {
    # 跳过指定错误
    SKIP_ERRORS="1062,1032"  # 主键冲突,记录不存在
    
    for ERROR_CODE in $SKIP_ERRORS; do
        ERROR_COUNT=$(mysql -u$MYSQL_USER -p$MYSQL_PASS -h$MYSQL_HOST -e \
            "SELECT COUNT(*) FROM performance_schema.replication_applier_status_by_worker 
             WHERE LAST_ERROR_NUMBER = $ERROR_CODE" 2>/dev/null | tail -1)
        
        if [ "$ERROR_COUNT" -gt 0 ]; then
            echo "Found error $ERROR_CODE, skipping..."
            mysql -u$MYSQL_USER -p$MYSQL_PASS -h$MYSQL_HOST -e \
                "STOP SLAVE; SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1; START SLAVE;"
            sleep 2
        fi
    done
}

# 主循环
while true; do
    check_replication
    if [ $? -eq 1 ]; then
        auto_repair_replication
        sleep 10
        check_replication
    fi
    sleep 30  # 每30秒检查一次
done

三、读写分离实现方案

3.1 应用层读写分离

3.1.1 Spring Boot + MyBatis配置

@Configuration
@MapperScan(basePackages = "com.example.mapper")
public class DataSourceConfig {
    
    @Value("${spring.datasource.master.url}")
    private String masterUrl;
    
    @Value("${spring.datasource.slave.url}")
    private String slaveUrl;
    
    /**
     * 主数据源(写)
     */
    @Bean(name = "masterDataSource")
    @ConfigurationProperties(prefix = "spring.datasource.master")
    public DataSource masterDataSource() {
        return DataSourceBuilder.create().build();
    }
    
    /**
     * 从数据源(读)
     */
    @Bean(name = "slaveDataSource")
    @ConfigurationProperties(prefix = "spring.datasource.slave")
    @ConditionalOnProperty(name = "spring.datasource.slave.enabled", havingValue = "true")
    public DataSource slaveDataSource() {
        return DataSourceBuilder.create().build();
    }
    
    /**
     * 动态数据源路由
     */
    @Primary
    @Bean(name = "dynamicDataSource")
    public DataSource dynamicDataSource() {
        Map<Object, Object> targetDataSources = new HashMap<>(2);
        targetDataSources.put(DataSourceType.MASTER, masterDataSource());
        targetDataSources.put(DataSourceType.SLAVE, slaveDataSource());
        
        DynamicDataSource dynamicDataSource = new DynamicDataSource();
        dynamicDataSource.setTargetDataSources(targetDataSources);
        dynamicDataSource.setDefaultTargetDataSource(masterDataSource());
        
        return dynamicDataSource;
    }
    
    /**
     * 事务管理器
     */
    @Bean
    public PlatformTransactionManager transactionManager() {
        return new DataSourceTransactionManager(dynamicDataSource());
    }
    
    /**
     * MyBatis配置
     */
    @Bean
    public SqlSessionFactory sqlSessionFactory() throws Exception {
        SqlSessionFactoryBean sessionFactory = new SqlSessionFactoryBean();
        sessionFactory.setDataSource(dynamicDataSource());
        sessionFactory.setMapperLocations(
            new PathMatchingResourcePatternResolver()
                .getResources("classpath:mapper/*.xml"));
        return sessionFactory.getObject();
    }
}

3.1.2 数据源路由上下文

/**
 * 数据源类型枚举
 */
public enum DataSourceType {
    MASTER,   // 主库,用于写操作
    SLAVE     // 从库,用于读操作
}

/**
 * 数据源上下文,用于切换数据源
 */
public class DataSourceContextHolder {
    
    private static final ThreadLocal<DataSourceType> CONTEXT_HOLDER = 
        new ThreadLocal<>();
    
    public static void setDataSourceType(DataSourceType dataSourceType) {
        CONTEXT_HOLDER.set(dataSourceType);
    }
    
    public static DataSourceType getDataSourceType() {
        return CONTEXT_HOLDER.get() == null ? 
            DataSourceType.MASTER : CONTEXT_HOLDER.get();
    }
    
    public static void clearDataSourceType() {
        CONTEXT_HOLDER.remove();
    }
}

/**
 * 动态数据源实现
 */
public class DynamicDataSource extends AbstractRoutingDataSource {
    
    @Override
    protected Object determineCurrentLookupKey() {
        return DataSourceContextHolder.getDataSourceType();
    }
}

3.1.3 自定义注解实现读写分离

/**
 * 读库注解
 */
@Target({ElementType.METHOD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface ReadOnly {
    
    /**
     * 读库名称,可用于多个从库的场景
     */
    String value() default "";
    
    /**
     * 是否强制走从库(即使有事务)
     */
    boolean force() default false;
}

/**
 * 写库注解
 */
@Target({ElementType.METHOD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface WriteOnly {
}

/**
 * 数据源切面
 */
@Aspect
@Component
@Slf4j
public class DataSourceAspect {
    
    @Around("@annotation(readOnly)")
    public Object around(ProceedingJoinPoint joinPoint, ReadOnly readOnly) throws Throwable {
        try {
            // 设置数据源为从库
            if (readOnly.force() || !TransactionSynchronizationManager.isActualTransactionActive()) {
                DataSourceContextHolder.setDataSourceType(DataSourceType.SLAVE);
                log.debug("Switch to slave datasource for method: {}", 
                    joinPoint.getSignature().getName());
            }
            return joinPoint.proceed();
        } finally {
            // 清理数据源配置
            DataSourceContextHolder.clearDataSourceType();
        }
    }
    
    @Around("@annotation(writeOnly)")
    public Object around(ProceedingJoinPoint joinPoint, WriteOnly writeOnly) throws Throwable {
        try {
            DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
            log.debug("Switch to master datasource for method: {}", 
                joinPoint.getSignature().getName());
            return joinPoint.proceed();
        } finally {
            DataSourceContextHolder.clearDataSourceType();
        }
    }
    
    /**
     * 根据方法名自动路由
     */
    @Before("@within(org.springframework.stereotype.Repository)")
    public void before(JoinPoint joinPoint) {
        String methodName = joinPoint.getSignature().getName();
        
        // 读方法走从库
        if (methodName.startsWith("find") || 
            methodName.startsWith("select") || 
            methodName.startsWith("query") || 
            methodName.startsWith("get") || 
            methodName.startsWith("list") || 
            methodName.startsWith("count")) {
            
            if (!TransactionSynchronizationManager.isActualTransactionActive()) {
                DataSourceContextHolder.setDataSourceType(DataSourceType.SLAVE);
                log.debug("Auto switch to slave for read method: {}", methodName);
            }
        } 
        // 写方法走主库
        else if (methodName.startsWith("insert") || 
                 methodName.startsWith("save") || 
                 methodName.startsWith("update") || 
                 methodName.startsWith("delete") || 
                 methodName.startsWith("remove")) {
            
            DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
            log.debug("Auto switch to master for write method: {}", methodName);
        }
    }
    
    @After("@within(org.springframework.stereotype.Repository)")
    public void after(JoinPoint joinPoint) {
        DataSourceContextHolder.clearDataSourceType();
    }
}

3.1.4 事务中的读写分离处理

/**
 * 事务管理器增强,处理事务中的读操作
 */
@Component
public class EnhancedTransactionManager extends DataSourceTransactionManager {
    
    @Override
    protected void doBegin(Object transaction, TransactionDefinition definition) {
        // 事务开始时强制使用主库
        DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
        super.doBegin(transaction, definition);
    }
    
    @Override
    protected void doCleanupAfterCompletion(Object transaction) {
        super.doCleanupAfterCompletion(transaction);
        DataSourceContextHolder.clearDataSourceType();
    }
}

/**
 * 服务层使用示例
 */
@Service
@Slf4j
public class OrderService {
    
    @Autowired
    private OrderMapper orderMapper;
    
    /**
     * 写操作 - 自动走主库
     */
    @Transactional
    public Order createOrder(Order order) {
        // 写操作自动走主库
        orderMapper.insert(order);
        
        // 事务中的读操作也走主库(避免主从延迟)
        Order savedOrder = orderMapper.selectById(order.getId());
        
        // 其他业务逻辑...
        processOrder(savedOrder);
        
        return savedOrder;
    }
    
    /**
     * 只读操作 - 走从库
     */
    @ReadOnly
    public Order getOrderById(Long orderId) {
        // 走从库查询
        return orderMapper.selectById(orderId);
    }
    
    /**
     * 复杂查询 - 强制走从库
     */
    @ReadOnly(force = true)
    public List<Order> searchOrders(OrderQuery query) {
        // 即使有事务,也强制走从库
        return orderMapper.search(query);
    }
    
    /**
     * 混合操作:先读后写
     */
    public void updateOrderStatus(Long orderId, String newStatus) {
        // 1. 查询走从库
        Order order = getOrderById(orderId);
        
        // 2. 更新走主库(新事务)
        updateOrder(order, newStatus);
    }
    
    @Transactional
    @WriteOnly
    private void updateOrder(Order order, String newStatus) {
        order.setStatus(newStatus);
        orderMapper.updateById(order);
    }
}

3.2 中间件读写分离

3.2.1 使用ShardingSphere-JDBC

<!-- pom.xml -->
<dependency>
    <groupId>org.apache.shardingsphere</groupId>
    <artifactId>shardingsphere-jdbc-core</artifactId>
    <version>5.3.2</version>
</dependency>
# application-sharding.yml
spring:
  shardingsphere:
    datasource:
      names: master,slave0,slave1
      master:
        type: com.zaxxer.hikari.HikariDataSource
        driver-class-name: com.mysql.cj.jdbc.Driver
        jdbc-url: jdbc:mysql://192.168.1.100:3306/order_db
        username: root
        password: MasterPass123
      slave0:
        type: com.zaxxer.hikari.HikariDataSource
        driver-class-name: com.mysql.cj.jdbc.Driver
        jdbc-url: jdbc:mysql://192.168.1.101:3306/order_db
        username: root
        password: SlavePass123
      slave1:
        type: com.zaxxer.hikari.HikariDataSource
        driver-class-name: com.mysql.cj.jdbc.Driver
        jdbc-url: jdbc:mysql://192.168.1.102:3306/order_db
        username: root
        password: SlavePass123
    
    rules:
      readwrite-splitting:
        data-sources:
          readwrite_ds:
            type: Static
            props:
              write-data-source-name: master
              read-data-source-names: slave0,slave1
            load-balancer-name: round_robin
        
        load-balancers:
          round_robin:
            type: ROUND_ROBIN
      
      # 可选:配置SQL Hint强制路由
      hint:
        default-database-strategy:
          hint-algorithm:
            type: CLASS_BASED
            props:
              algorithmClassName: com.example.config.ForceMasterHintAlgorithm
    
    props:
      sql-show: true  # 显示SQL日志

3.2.2 自定义Hint算法

public class ForceMasterHintAlgorithm implements HintShardingAlgorithm<String> {
    
    @Override
    public Collection<String> doSharding(
            Collection<String> availableTargetNames, 
            HintShardingValue<String> shardingValue) {
        
        // 强制路由到主库
        return availableTargetNames.stream()
            .filter(name -> name.startsWith("master"))
            .collect(Collectors.toList());
    }
    
    @Override
    public void init() {
        // 初始化代码
    }
    
    @Override
    public String getType() {
        return "HINT";
    }
}

// 使用Hint强制走主库
public class OrderService {
    
    public void forceMasterOperation() {
        try (HintManager hintManager = HintManager.getInstance()) {
            // 强制后续操作走主库
            hintManager.setWriteRouteOnly();
            
            // 执行需要走主库的操作
            orderMapper.updateCriticalData();
        }  // 自动关闭HintManager,恢复默认路由
    }
}

3.3 读写分离的注意事项

3.3.1 主从延迟问题

@Component
public class MasterSlaveDelayHandler {
    
    @Autowired
    private DataSource dataSource;
    
    /**
     * 检查主从延迟
     */
    public boolean checkDelay(Long maxDelaySeconds) {
        try (Connection conn = dataSource.getConnection()) {
            Statement stmt = conn.createStatement();
            ResultSet rs = stmt.executeQuery("SHOW SLAVE STATUS");
            
            if (rs.next()) {
                int delay = rs.getInt("Seconds_Behind_Master");
                return delay <= maxDelaySeconds;
            }
        } catch (SQLException e) {
            log.error("Check replication delay failed", e);
        }
        return false;
    }
    
    /**
     * 写后读的解决方案
     */
    @Service
    public class OrderServiceWithDelayHandler {
        
        @Autowired
        private MasterSlaveDelayHandler delayHandler;
        
        private final ThreadLocal<Long> lastWriteTime = new ThreadLocal<>();
        
        @Transactional
        public Order createOrderAndRead(Order order) {
            // 写操作
            orderMapper.insert(order);
            
            // 记录最后写时间
            lastWriteTime.set(System.currentTimeMillis());
            
            // 立即读取,可能从主库读
            return readAfterWrite(order.getId());
        }
        
        public Order readAfterWrite(Long orderId) {
            Long writeTime = lastWriteTime.get();
            
            if (writeTime != null) {
                long elapsed = System.currentTimeMillis() - writeTime;
                
                // 如果写后时间很短,强制从主库读
                if (elapsed < 5000) {  // 5秒内
                    try {
                        DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
                        return orderMapper.selectById(orderId);
                    } finally {
                        DataSourceContextHolder.clearDataSourceType();
                    }
                }
            }
            
            // 正常从从库读
            return orderMapper.selectById(orderId);
        }
    }
}

3.3.2 多个从库的负载均衡

/**
 * 从库负载均衡策略
 */
@Component
public class SlaveLoadBalancer {
    
    private final List<String> slaveNames = Arrays.asList("slave0", "slave1", "slave2");
    private final AtomicInteger counter = new AtomicInteger(0);
    
    // 1. 轮询策略
    public String getSlaveByRoundRobin() {
        int index = counter.getAndIncrement() % slaveNames.size();
        return slaveNames.get(index);
    }
    
    // 2. 随机策略
    public String getSlaveByRandom() {
        Random random = new Random();
        return slaveNames.get(random.nextInt(slaveNames.size()));
    }
    
    // 3. 权重策略
    public String getSlaveByWeight() {
        Map<String, Integer> weights = new HashMap<>();
        weights.put("slave0", 3);  // 配置高的服务器
        weights.put("slave1", 2);
        weights.put("slave2", 1);  // 配置低的服务器
        
        int totalWeight = weights.values().stream().mapToInt(Integer::intValue).sum();
        int random = new Random().nextInt(totalWeight);
        
        for (Map.Entry<String, Integer> entry : weights.entrySet()) {
            random -= entry.getValue();
            if (random < 0) {
                return entry.getKey();
            }
        }
        return slaveNames.get(0);
    }
    
    // 4. 基于健康检查的负载均衡
    public String getHealthySlave() {
        Map<String, Boolean> healthStatus = checkSlaveHealth();
        
        List<String> healthySlaves = healthStatus.entrySet().stream()
            .filter(Map.Entry::getValue)
            .map(Map.Entry::getKey)
            .collect(Collectors.toList());
        
        if (healthySlaves.isEmpty()) {
            throw new RuntimeException("No healthy slave available");
        }
        
        return healthySlaves.get(new Random().nextInt(healthySlaves.size()));
    }
    
    private Map<String, Boolean> checkSlaveHealth() {
        Map<String, Boolean> healthStatus = new HashMap<>();
        
        for (String slave : slaveNames) {
            try (Connection conn = getConnection(slave)) {
                Statement stmt = conn.createStatement();
                ResultSet rs = stmt.executeQuery("SELECT 1");
                healthStatus.put(slave, rs.next());
            } catch (SQLException e) {
                healthStatus.put(slave, false);
                log.warn("Slave {} is unhealthy: {}", slave, e.getMessage());
            }
        }
        
        return healthStatus;
    }
}

四、高可用方案:MHA(Master High Availability)

4.1 MHA架构

应用层
   ↓
VIP (Virtual IP) → [Master]
                       ↓ (复制)
                  [Slave1] ← MHA Manager监控和切换
                       ↓ (复制)
                  [Slave2]

4.2 MHA安装配置

#!/bin/bash
# install_mha.sh

# 1. 安装Perl依赖
yum install -y perl-DBD-MySQL perl-Config-Tiny \
    perl-Log-Dispatch perl-Parallel-ForkManager \
    perl-Time-HiRes

# 2. 所有节点安装MHA Node
wget https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58-0.el7.centos.noarch.rpm
rpm -ivh mha4mysql-node-0.58-0.el7.centos.noarch.rpm

# 3. 管理节点安装MHA Manager
wget https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
rpm -ivh mha4mysql-manager-0.58-0.el7.centos.noarch.rpm

# 4. 配置SSH免密登录(所有节点间)
ssh-keygen -t rsa
# 复制公钥到所有节点
ssh-copy-id -i ~/.ssh/id_rsa.pub root@master
ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave2

4.3 MHA配置文件

# /etc/mha/app1.cnf
[server default]
# MySQL连接配置
user=mha_user
password=MhaPass123
ssh_user=root
repl_user=repl
repl_password=ReplPassword123

# 工作目录
manager_workdir=/var/log/mha/app1
manager_log=/var/log/mha/app1/manager.log
remote_workdir=/var/log/mha/app1

# 故障切换配置
ping_interval=3          # 心跳检测间隔(秒)
ping_type=SELECT         # 心跳检测方式
master_ip_failover_script=/usr/local/bin/master_ip_failover
master_ip_online_change_script=/usr/local/bin/master_ip_online_change
report_script=/usr/local/bin/send_report
secondary_check_script=masterha_secondary_check -s slave1 -s slave2

# 复制配置
candidate_master=1       # slave1作为候选主库
check_repl_delay=0       # 不检查复制延迟
shutdown_script=""
ignore_fail_on_start=0

[server1]
hostname=192.168.1.100
port=3306
candidate_master=1

[server2]
hostname=192.168.1.101
port=3306
candidate_master=1

[server3]
hostname=192.168.1.102
port=3306
no_master=1             # 永远不选为主库

4.4 故障切换脚本

#!/usr/bin/env perl
# master_ip_failover

use strict;
use warnings;
use MHA::DBHelper;

my $vip = '192.168.1.200/24';
my $interface = 'eth0';
my $ssh_user = "root";
my $orig_master_host = $ARGV[0];
my $new_master_host = $ARGV[1];

# 主函数
sub main {
    if ( $command eq "stop" || $command eq "stopssh" ) {
        # 原主库故障,从原主库移除VIP
        remove_vip($orig_master_host);
    }
    elsif ( $command eq "start" ) {
        # 新主库提升,给新主库添加VIP
        add_vip($new_master_host);
    }
    elsif ( $command eq "status" ) {
        # 检查VIP状态
        check_vip();
    }
}

# 添加VIP
sub add_vip {
    my $host = shift;
    print "Adding VIP $vip to $host\n";
    
    my $cmd = "ssh $ssh_user\@$host \" /sbin/ip addr add $vip dev $interface \"";
    system($cmd);
    
    # 发送ARP更新
    $cmd = "ssh $ssh_user\@$host \" /sbin/arping -c 3 -A -I $interface $vip \"";
    system($cmd);
    
    print "VIP added successfully\n";
}

# 移除VIP
sub remove_vip {
    my $host = shift;
    print "Removing VIP $vip from $host\n";
    
    my $cmd = "ssh $ssh_user\@$host \" /sbin/ip addr del $vip dev $interface \"";
    system($cmd);
    
    print "VIP removed successfully\n";
}

# 执行主函数
main();
exit 0;

4.5 MHA管理命令

# 1. 检查SSH连接
masterha_check_ssh --conf=/etc/mha/app1.cnf

# 2. 检查复制状态
masterha_check_repl --conf=/etc/mha/app1.cnf

# 3. 启动MHA监控
nohup masterha_manager --conf=/etc/mha/app1.cnf \
    > /var/log/mha/app1/manager.log 2>&1 &

# 4. 检查MHA状态
masterha_check_status --conf=/etc/mha/app1.cnf

# 5. 停止MHA监控
masterha_stop --conf=/etc/mha/app1.cnf

# 6. 手动故障切换
masterha_master_switch --conf=/etc/mha/app1.cnf \
    --master_state=alive \
    --new_master_host=192.168.1.101 \
    --orig_master_is_new_slave \
    --interactive=0

# 7. 查看日志
tail -f /var/log/mha/app1/manager.log

4.6 MHA监控脚本

#!/bin/bash
# monitor_mha.sh

CONFIG_FILE="/etc/mha/app1.cnf"
LOG_FILE="/var/log/mha/app1/manager.log"
ALERT_EMAIL="dba@company.com"
SLACK_WEBHOOK="https://hooks.slack.com/services/xxx"

check_mha_status() {
    STATUS=$(masterha_check_status --conf=$CONFIG_FILE 2>&1)
    
    if echo "$STATUS" | grep -q "stopped"; then
        send_alert "MHA Manager Stopped" "MHA监控已停止,需要手动重启"
        return 1
    elif echo "$STATUS" | grep -q "running"; then
        echo "MHA is running normally"
        return 0
    else
        send_alert "MHA Status Unknown" "无法获取MHA状态: $STATUS"
        return 2
    fi
}

check_master_alive() {
    MASTER_HOST=$(grep -A1 "\[server1\]" $CONFIG_FILE | grep "hostname" | cut -d= -f2)
    
    if mysql -h $MASTER_HOST -u mha_user -p'MhaPass123' -e "SELECT 1" &>/dev/null; then
        echo "Master is alive"
        return 0
    else
        send_alert "Master Database Down" "主库 $MASTER_HOST 无法连接"
        return 1
    fi
}

check_replication_health() {
    # 检查所有从库的复制状态
    SLAVES=$(grep "hostname" $CONFIG_FILE | grep -v "server1" | cut -d= -f2)
    
    for SLAVE in $SLAVES; do
        STATUS=$(mysql -h $SLAVE -u mha_user -p'MhaPass123' -e "SHOW SLAVE STATUS\G" 2>/dev/null)
        
        if [ -z "$STATUS" ]; then
            send_alert "Slave Unreachable" "从库 $SLAVE 无法连接"
            continue
        fi
        
        IO_RUNNING=$(echo "$STATUS" | grep "Slave_IO_Running" | awk '{print $2}')
        SQL_RUNNING=$(echo "$STATUS" | grep "Slave_SQL_Running" | awk '{print $2}')
        DELAY=$(echo "$STATUS" | grep "Seconds_Behind_Master" | awk '{print $2}')
        
        if [ "$IO_RUNNING" != "Yes" ] || [ "$SQL_RUNNING" != "Yes" ]; then
            send_alert "Slave Replication Stopped" "从库 $SLAVE 复制已停止"
        elif [ "$DELAY" -gt 300 ]; then  # 5分钟延迟
            send_alert "Slave Replication Delay" "从库 $SLAVE 延迟 $DELAY 秒"
        fi
    done
}

send_alert() {
    SUBJECT="$1"
    MESSAGE="$2"
    
    # 发送邮件
    echo "$MESSAGE" | mail -s "[MHA Alert] $SUBJECT" $ALERT_EMAIL
    
    # 发送Slack消息
    curl -X POST -H 'Content-type: application/json' \
         --data "{\"text\":\"*MHA告警*: $SUBJECT\n$MESSAGE\"}" \
         $SLACK_WEBHOOK
}

# 主监控循环
while true; do
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checking MHA status..."
    
    check_mha_status
    check_master_alive
    check_replication_health
    
    sleep 60  # 每分钟检查一次
done

五、MySQL InnoDB Cluster(MySQL 8.0+)

5.1 InnoDB Cluster架构

MySQL Shell
   ↓
MySQL Router → [读写端口] → Primary (主节点)
      ↓                     ↙       ↘
[只读端口]          Secondary     Secondary
                     (从节点)     (从节点)

5.2 集群部署步骤

# 1. 安装MySQL Shell
wget https://dev.mysql.com/get/mysql-shell-8.0.33-linux-glibc2.12-x86-64bit.tar.gz
tar -xzf mysql-shell-8.0.33-linux