MySQL高可用与扩展方案完全指南
一、MySQL高可用架构演进全景
演进路径:
单机MySQL → 主从复制 → 主从+读写分离 → MHA/MMM高可用 → InnoDB Cluster → 云数据库
二、主从复制(Replication)详解
2.1 复制原理与架构
-- 复制原理:基于二进制日志(binlog)
主库:写操作 → 二进制日志(binlog)
从库:IO线程拉取binlog → SQL线程重放 → 数据同步
2.2 复制配置实战
2.2.1 主库配置
# /etc/mysql/my.cnf 或 /etc/my.cnf
[mysqld]
# 服务器ID,集群内唯一
server-id = 1
# 启用二进制日志
log_bin = /var/lib/mysql/mysql-bin.log
# 二进制日志格式:ROW/MIXED/STATEMENT
binlog_format = ROW
# 二进制日志保留天数
expire_logs_days = 7
# 最大二进制日志大小
max_binlog_size = 100M
# 需要复制的数据库(可选)
binlog_do_db = order_db
# 忽略复制的数据库(可选)
binlog_ignore_db = test_db
# 从库默认只读
read_only = 0
# 启用GTID(推荐)
gtid_mode = ON
enforce_gtid_consistency = ON
2.2.2 创建复制用户
-- 在主库执行
-- 创建专门用于复制的用户
CREATE USER 'repl'@'%' IDENTIFIED BY 'ReplPassword123!';
GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';
FLUSH PRIVILEGES;
-- 查看主库状态
SHOW MASTER STATUS;
-- 输出:
-- +------------------+----------+--------------+------------------+-------------------+
-- | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
-- +------------------+----------+--------------+------------------+-------------------+
-- | mysql-bin.000003 | 107 | order_db | test_db | |
-- +------------------+----------+--------------+------------------+-------------------+
2.2.3 从库配置
# 从库配置文件
[mysqld]
server-id = 2 # 必须与主库不同
log_bin = /var/lib/mysql/mysql-bin.log
relay_log = /var/lib/mysql/relay-bin.log
read_only = 1 # 从库只读
super_read_only = 1 # 超级用户也只读
log_slave_updates = 1 # 从库也写binlog(用于级联复制)
gtid_mode = ON
enforce_gtid_consistency = ON
2.2.4 配置从库连接主库
-- 在从库执行
-- 方法1:基于二进制日志位置的传统复制
CHANGE MASTER TO
MASTER_HOST = '192.168.1.100',
MASTER_PORT = 3306,
MASTER_USER = 'repl',
MASTER_PASSWORD = 'ReplPassword123!',
MASTER_LOG_FILE = 'mysql-bin.000003',
MASTER_LOG_POS = 107;
-- 方法2:基于GTID的复制(推荐)
CHANGE MASTER TO
MASTER_HOST = '192.168.1.100',
MASTER_PORT = 3306,
MASTER_USER = 'repl',
MASTER_PASSWORD = 'ReplPassword123!',
MASTER_AUTO_POSITION = 1; -- 自动位置识别
-- 启动复制
START SLAVE;
-- 查看复制状态
SHOW SLAVE STATUS\G;
-- 关键字段检查:
-- Slave_IO_Running: Yes
-- Slave_SQL_Running: Yes
-- Seconds_Behind_Master: 0 # 复制延迟秒数
-- Last_IO_Error:
-- Last_SQL_Error:
-- 查看所有从库连接
SHOW SLAVE HOSTS;
2.3 复制模式对比
| 复制模式 |
优点 |
缺点 |
适用场景 |
| 异步复制 |
性能好,主库无延迟 |
可能丢失数据 |
大多数业务 |
| 半同步复制 |
数据更安全 |
性能略差 |
金融、交易类 |
| 组复制 |
强一致性,高可用 |
配置复杂 |
关键业务 |
2.3.1 半同步复制配置
-- 主从库都需要安装插件
INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
-- 主库配置
SET GLOBAL rpl_semi_sync_master_enabled = 1;
SET GLOBAL rpl_semi_sync_master_timeout = 1000; -- 1秒超时
-- 从库配置
SET GLOBAL rpl_semi_sync_slave_enabled = 1;
-- 重启复制使配置生效
STOP SLAVE IO_THREAD;
START SLAVE IO_THREAD;
-- 查看状态
SHOW VARIABLES LIKE 'rpl_semi_sync%';
SHOW STATUS LIKE 'Rpl_semi_sync%';
2.4 监控与管理脚本
#!/bin/bash
# monitor_replication.sh
MYSQL_USER="monitor"
MYSQL_PASS="MonitorPass123"
MYSQL_HOST="localhost"
ALERT_EMAIL="dba@company.com"
check_replication() {
# 获取复制状态
STATUS=$(mysql -u$MYSQL_USER -p$MYSQL_PASS -h$MYSQL_HOST -e "SHOW SLAVE STATUS\G" 2>/dev/null)
if [ -z "$STATUS" ]; then
echo "ERROR: Cannot connect to MySQL"
return 1
fi
# 解析关键指标
IO_RUNNING=$(echo "$STATUS" | grep "Slave_IO_Running" | awk '{print $2}')
SQL_RUNNING=$(echo "$STATUS" | grep "Slave_SQL_Running" | awk '{print $2}')
DELAY=$(echo "$STATUS" | grep "Seconds_Behind_Master" | awk '{print $2}')
LAST_ERROR=$(echo "$STATUS" | grep "Last_Error" | awk -F': ' '{print $2}')
# 检查状态
if [ "$IO_RUNNING" != "Yes" ] || [ "$SQL_RUNNING" != "Yes" ]; then
send_alert "Replication Stopped" "IO: $IO_RUNNING, SQL: $SQL_RUNNING, Error: $LAST_ERROR"
return 1
fi
# 检查延迟
if [ "$DELAY" -gt 60 ]; then # 超过60秒延迟
send_alert "Replication Delay High" "Delay: $DELAY seconds"
return 2
fi
echo "OK: Replication running normally, Delay: $DELAY seconds"
return 0
}
send_alert() {
SUBJECT="$1"
MESSAGE="$2"
echo "ALERT: $SUBJECT - $MESSAGE" | mail -s "[MySQL Alert] $SUBJECT" $ALERT_EMAIL
# 也可以发送到钉钉/企业微信
curl -X POST -H "Content-Type: application/json" \
-d "{\"msgtype\": \"text\", \"text\": {\"content\": \"MySQL告警: $SUBJECT\\n$MESSAGE\"}}" \
https://oapi.dingtalk.com/robot/send?access_token=YOUR_TOKEN
}
# 自动修复常见问题
auto_repair_replication() {
# 跳过指定错误
SKIP_ERRORS="1062,1032" # 主键冲突,记录不存在
for ERROR_CODE in $SKIP_ERRORS; do
ERROR_COUNT=$(mysql -u$MYSQL_USER -p$MYSQL_PASS -h$MYSQL_HOST -e \
"SELECT COUNT(*) FROM performance_schema.replication_applier_status_by_worker
WHERE LAST_ERROR_NUMBER = $ERROR_CODE" 2>/dev/null | tail -1)
if [ "$ERROR_COUNT" -gt 0 ]; then
echo "Found error $ERROR_CODE, skipping..."
mysql -u$MYSQL_USER -p$MYSQL_PASS -h$MYSQL_HOST -e \
"STOP SLAVE; SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1; START SLAVE;"
sleep 2
fi
done
}
# 主循环
while true; do
check_replication
if [ $? -eq 1 ]; then
auto_repair_replication
sleep 10
check_replication
fi
sleep 30 # 每30秒检查一次
done
三、读写分离实现方案
3.1 应用层读写分离
3.1.1 Spring Boot + MyBatis配置
@Configuration
@MapperScan(basePackages = "com.example.mapper")
public class DataSourceConfig {
@Value("${spring.datasource.master.url}")
private String masterUrl;
@Value("${spring.datasource.slave.url}")
private String slaveUrl;
/**
* 主数据源(写)
*/
@Bean(name = "masterDataSource")
@ConfigurationProperties(prefix = "spring.datasource.master")
public DataSource masterDataSource() {
return DataSourceBuilder.create().build();
}
/**
* 从数据源(读)
*/
@Bean(name = "slaveDataSource")
@ConfigurationProperties(prefix = "spring.datasource.slave")
@ConditionalOnProperty(name = "spring.datasource.slave.enabled", havingValue = "true")
public DataSource slaveDataSource() {
return DataSourceBuilder.create().build();
}
/**
* 动态数据源路由
*/
@Primary
@Bean(name = "dynamicDataSource")
public DataSource dynamicDataSource() {
Map<Object, Object> targetDataSources = new HashMap<>(2);
targetDataSources.put(DataSourceType.MASTER, masterDataSource());
targetDataSources.put(DataSourceType.SLAVE, slaveDataSource());
DynamicDataSource dynamicDataSource = new DynamicDataSource();
dynamicDataSource.setTargetDataSources(targetDataSources);
dynamicDataSource.setDefaultTargetDataSource(masterDataSource());
return dynamicDataSource;
}
/**
* 事务管理器
*/
@Bean
public PlatformTransactionManager transactionManager() {
return new DataSourceTransactionManager(dynamicDataSource());
}
/**
* MyBatis配置
*/
@Bean
public SqlSessionFactory sqlSessionFactory() throws Exception {
SqlSessionFactoryBean sessionFactory = new SqlSessionFactoryBean();
sessionFactory.setDataSource(dynamicDataSource());
sessionFactory.setMapperLocations(
new PathMatchingResourcePatternResolver()
.getResources("classpath:mapper/*.xml"));
return sessionFactory.getObject();
}
}
3.1.2 数据源路由上下文
/**
* 数据源类型枚举
*/
public enum DataSourceType {
MASTER, // 主库,用于写操作
SLAVE // 从库,用于读操作
}
/**
* 数据源上下文,用于切换数据源
*/
public class DataSourceContextHolder {
private static final ThreadLocal<DataSourceType> CONTEXT_HOLDER =
new ThreadLocal<>();
public static void setDataSourceType(DataSourceType dataSourceType) {
CONTEXT_HOLDER.set(dataSourceType);
}
public static DataSourceType getDataSourceType() {
return CONTEXT_HOLDER.get() == null ?
DataSourceType.MASTER : CONTEXT_HOLDER.get();
}
public static void clearDataSourceType() {
CONTEXT_HOLDER.remove();
}
}
/**
* 动态数据源实现
*/
public class DynamicDataSource extends AbstractRoutingDataSource {
@Override
protected Object determineCurrentLookupKey() {
return DataSourceContextHolder.getDataSourceType();
}
}
3.1.3 自定义注解实现读写分离
/**
* 读库注解
*/
@Target({ElementType.METHOD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface ReadOnly {
/**
* 读库名称,可用于多个从库的场景
*/
String value() default "";
/**
* 是否强制走从库(即使有事务)
*/
boolean force() default false;
}
/**
* 写库注解
*/
@Target({ElementType.METHOD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface WriteOnly {
}
/**
* 数据源切面
*/
@Aspect
@Component
@Slf4j
public class DataSourceAspect {
@Around("@annotation(readOnly)")
public Object around(ProceedingJoinPoint joinPoint, ReadOnly readOnly) throws Throwable {
try {
// 设置数据源为从库
if (readOnly.force() || !TransactionSynchronizationManager.isActualTransactionActive()) {
DataSourceContextHolder.setDataSourceType(DataSourceType.SLAVE);
log.debug("Switch to slave datasource for method: {}",
joinPoint.getSignature().getName());
}
return joinPoint.proceed();
} finally {
// 清理数据源配置
DataSourceContextHolder.clearDataSourceType();
}
}
@Around("@annotation(writeOnly)")
public Object around(ProceedingJoinPoint joinPoint, WriteOnly writeOnly) throws Throwable {
try {
DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
log.debug("Switch to master datasource for method: {}",
joinPoint.getSignature().getName());
return joinPoint.proceed();
} finally {
DataSourceContextHolder.clearDataSourceType();
}
}
/**
* 根据方法名自动路由
*/
@Before("@within(org.springframework.stereotype.Repository)")
public void before(JoinPoint joinPoint) {
String methodName = joinPoint.getSignature().getName();
// 读方法走从库
if (methodName.startsWith("find") ||
methodName.startsWith("select") ||
methodName.startsWith("query") ||
methodName.startsWith("get") ||
methodName.startsWith("list") ||
methodName.startsWith("count")) {
if (!TransactionSynchronizationManager.isActualTransactionActive()) {
DataSourceContextHolder.setDataSourceType(DataSourceType.SLAVE);
log.debug("Auto switch to slave for read method: {}", methodName);
}
}
// 写方法走主库
else if (methodName.startsWith("insert") ||
methodName.startsWith("save") ||
methodName.startsWith("update") ||
methodName.startsWith("delete") ||
methodName.startsWith("remove")) {
DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
log.debug("Auto switch to master for write method: {}", methodName);
}
}
@After("@within(org.springframework.stereotype.Repository)")
public void after(JoinPoint joinPoint) {
DataSourceContextHolder.clearDataSourceType();
}
}
3.1.4 事务中的读写分离处理
/**
* 事务管理器增强,处理事务中的读操作
*/
@Component
public class EnhancedTransactionManager extends DataSourceTransactionManager {
@Override
protected void doBegin(Object transaction, TransactionDefinition definition) {
// 事务开始时强制使用主库
DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
super.doBegin(transaction, definition);
}
@Override
protected void doCleanupAfterCompletion(Object transaction) {
super.doCleanupAfterCompletion(transaction);
DataSourceContextHolder.clearDataSourceType();
}
}
/**
* 服务层使用示例
*/
@Service
@Slf4j
public class OrderService {
@Autowired
private OrderMapper orderMapper;
/**
* 写操作 - 自动走主库
*/
@Transactional
public Order createOrder(Order order) {
// 写操作自动走主库
orderMapper.insert(order);
// 事务中的读操作也走主库(避免主从延迟)
Order savedOrder = orderMapper.selectById(order.getId());
// 其他业务逻辑...
processOrder(savedOrder);
return savedOrder;
}
/**
* 只读操作 - 走从库
*/
@ReadOnly
public Order getOrderById(Long orderId) {
// 走从库查询
return orderMapper.selectById(orderId);
}
/**
* 复杂查询 - 强制走从库
*/
@ReadOnly(force = true)
public List<Order> searchOrders(OrderQuery query) {
// 即使有事务,也强制走从库
return orderMapper.search(query);
}
/**
* 混合操作:先读后写
*/
public void updateOrderStatus(Long orderId, String newStatus) {
// 1. 查询走从库
Order order = getOrderById(orderId);
// 2. 更新走主库(新事务)
updateOrder(order, newStatus);
}
@Transactional
@WriteOnly
private void updateOrder(Order order, String newStatus) {
order.setStatus(newStatus);
orderMapper.updateById(order);
}
}
3.2 中间件读写分离
3.2.1 使用ShardingSphere-JDBC
<!-- pom.xml -->
<dependency>
<groupId>org.apache.shardingsphere</groupId>
<artifactId>shardingsphere-jdbc-core</artifactId>
<version>5.3.2</version>
</dependency>
# application-sharding.yml
spring:
shardingsphere:
datasource:
names: master,slave0,slave1
master:
type: com.zaxxer.hikari.HikariDataSource
driver-class-name: com.mysql.cj.jdbc.Driver
jdbc-url: jdbc:mysql://192.168.1.100:3306/order_db
username: root
password: MasterPass123
slave0:
type: com.zaxxer.hikari.HikariDataSource
driver-class-name: com.mysql.cj.jdbc.Driver
jdbc-url: jdbc:mysql://192.168.1.101:3306/order_db
username: root
password: SlavePass123
slave1:
type: com.zaxxer.hikari.HikariDataSource
driver-class-name: com.mysql.cj.jdbc.Driver
jdbc-url: jdbc:mysql://192.168.1.102:3306/order_db
username: root
password: SlavePass123
rules:
readwrite-splitting:
data-sources:
readwrite_ds:
type: Static
props:
write-data-source-name: master
read-data-source-names: slave0,slave1
load-balancer-name: round_robin
load-balancers:
round_robin:
type: ROUND_ROBIN
# 可选:配置SQL Hint强制路由
hint:
default-database-strategy:
hint-algorithm:
type: CLASS_BASED
props:
algorithmClassName: com.example.config.ForceMasterHintAlgorithm
props:
sql-show: true # 显示SQL日志
3.2.2 自定义Hint算法
public class ForceMasterHintAlgorithm implements HintShardingAlgorithm<String> {
@Override
public Collection<String> doSharding(
Collection<String> availableTargetNames,
HintShardingValue<String> shardingValue) {
// 强制路由到主库
return availableTargetNames.stream()
.filter(name -> name.startsWith("master"))
.collect(Collectors.toList());
}
@Override
public void init() {
// 初始化代码
}
@Override
public String getType() {
return "HINT";
}
}
// 使用Hint强制走主库
public class OrderService {
public void forceMasterOperation() {
try (HintManager hintManager = HintManager.getInstance()) {
// 强制后续操作走主库
hintManager.setWriteRouteOnly();
// 执行需要走主库的操作
orderMapper.updateCriticalData();
} // 自动关闭HintManager,恢复默认路由
}
}
3.3 读写分离的注意事项
3.3.1 主从延迟问题
@Component
public class MasterSlaveDelayHandler {
@Autowired
private DataSource dataSource;
/**
* 检查主从延迟
*/
public boolean checkDelay(Long maxDelaySeconds) {
try (Connection conn = dataSource.getConnection()) {
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("SHOW SLAVE STATUS");
if (rs.next()) {
int delay = rs.getInt("Seconds_Behind_Master");
return delay <= maxDelaySeconds;
}
} catch (SQLException e) {
log.error("Check replication delay failed", e);
}
return false;
}
/**
* 写后读的解决方案
*/
@Service
public class OrderServiceWithDelayHandler {
@Autowired
private MasterSlaveDelayHandler delayHandler;
private final ThreadLocal<Long> lastWriteTime = new ThreadLocal<>();
@Transactional
public Order createOrderAndRead(Order order) {
// 写操作
orderMapper.insert(order);
// 记录最后写时间
lastWriteTime.set(System.currentTimeMillis());
// 立即读取,可能从主库读
return readAfterWrite(order.getId());
}
public Order readAfterWrite(Long orderId) {
Long writeTime = lastWriteTime.get();
if (writeTime != null) {
long elapsed = System.currentTimeMillis() - writeTime;
// 如果写后时间很短,强制从主库读
if (elapsed < 5000) { // 5秒内
try {
DataSourceContextHolder.setDataSourceType(DataSourceType.MASTER);
return orderMapper.selectById(orderId);
} finally {
DataSourceContextHolder.clearDataSourceType();
}
}
}
// 正常从从库读
return orderMapper.selectById(orderId);
}
}
}
3.3.2 多个从库的负载均衡
/**
* 从库负载均衡策略
*/
@Component
public class SlaveLoadBalancer {
private final List<String> slaveNames = Arrays.asList("slave0", "slave1", "slave2");
private final AtomicInteger counter = new AtomicInteger(0);
// 1. 轮询策略
public String getSlaveByRoundRobin() {
int index = counter.getAndIncrement() % slaveNames.size();
return slaveNames.get(index);
}
// 2. 随机策略
public String getSlaveByRandom() {
Random random = new Random();
return slaveNames.get(random.nextInt(slaveNames.size()));
}
// 3. 权重策略
public String getSlaveByWeight() {
Map<String, Integer> weights = new HashMap<>();
weights.put("slave0", 3); // 配置高的服务器
weights.put("slave1", 2);
weights.put("slave2", 1); // 配置低的服务器
int totalWeight = weights.values().stream().mapToInt(Integer::intValue).sum();
int random = new Random().nextInt(totalWeight);
for (Map.Entry<String, Integer> entry : weights.entrySet()) {
random -= entry.getValue();
if (random < 0) {
return entry.getKey();
}
}
return slaveNames.get(0);
}
// 4. 基于健康检查的负载均衡
public String getHealthySlave() {
Map<String, Boolean> healthStatus = checkSlaveHealth();
List<String> healthySlaves = healthStatus.entrySet().stream()
.filter(Map.Entry::getValue)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
if (healthySlaves.isEmpty()) {
throw new RuntimeException("No healthy slave available");
}
return healthySlaves.get(new Random().nextInt(healthySlaves.size()));
}
private Map<String, Boolean> checkSlaveHealth() {
Map<String, Boolean> healthStatus = new HashMap<>();
for (String slave : slaveNames) {
try (Connection conn = getConnection(slave)) {
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("SELECT 1");
healthStatus.put(slave, rs.next());
} catch (SQLException e) {
healthStatus.put(slave, false);
log.warn("Slave {} is unhealthy: {}", slave, e.getMessage());
}
}
return healthStatus;
}
}
四、高可用方案:MHA(Master High Availability)
4.1 MHA架构
应用层
↓
VIP (Virtual IP) → [Master]
↓ (复制)
[Slave1] ← MHA Manager监控和切换
↓ (复制)
[Slave2]
4.2 MHA安装配置
#!/bin/bash
# install_mha.sh
# 1. 安装Perl依赖
yum install -y perl-DBD-MySQL perl-Config-Tiny \
perl-Log-Dispatch perl-Parallel-ForkManager \
perl-Time-HiRes
# 2. 所有节点安装MHA Node
wget https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58-0.el7.centos.noarch.rpm
rpm -ivh mha4mysql-node-0.58-0.el7.centos.noarch.rpm
# 3. 管理节点安装MHA Manager
wget https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
rpm -ivh mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
# 4. 配置SSH免密登录(所有节点间)
ssh-keygen -t rsa
# 复制公钥到所有节点
ssh-copy-id -i ~/.ssh/id_rsa.pub root@master
ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave2
4.3 MHA配置文件
# /etc/mha/app1.cnf
[server default]
# MySQL连接配置
user=mha_user
password=MhaPass123
ssh_user=root
repl_user=repl
repl_password=ReplPassword123
# 工作目录
manager_workdir=/var/log/mha/app1
manager_log=/var/log/mha/app1/manager.log
remote_workdir=/var/log/mha/app1
# 故障切换配置
ping_interval=3 # 心跳检测间隔(秒)
ping_type=SELECT # 心跳检测方式
master_ip_failover_script=/usr/local/bin/master_ip_failover
master_ip_online_change_script=/usr/local/bin/master_ip_online_change
report_script=/usr/local/bin/send_report
secondary_check_script=masterha_secondary_check -s slave1 -s slave2
# 复制配置
candidate_master=1 # slave1作为候选主库
check_repl_delay=0 # 不检查复制延迟
shutdown_script=""
ignore_fail_on_start=0
[server1]
hostname=192.168.1.100
port=3306
candidate_master=1
[server2]
hostname=192.168.1.101
port=3306
candidate_master=1
[server3]
hostname=192.168.1.102
port=3306
no_master=1 # 永远不选为主库
4.4 故障切换脚本
#!/usr/bin/env perl
# master_ip_failover
use strict;
use warnings;
use MHA::DBHelper;
my $vip = '192.168.1.200/24';
my $interface = 'eth0';
my $ssh_user = "root";
my $orig_master_host = $ARGV[0];
my $new_master_host = $ARGV[1];
# 主函数
sub main {
if ( $command eq "stop" || $command eq "stopssh" ) {
# 原主库故障,从原主库移除VIP
remove_vip($orig_master_host);
}
elsif ( $command eq "start" ) {
# 新主库提升,给新主库添加VIP
add_vip($new_master_host);
}
elsif ( $command eq "status" ) {
# 检查VIP状态
check_vip();
}
}
# 添加VIP
sub add_vip {
my $host = shift;
print "Adding VIP $vip to $host\n";
my $cmd = "ssh $ssh_user\@$host \" /sbin/ip addr add $vip dev $interface \"";
system($cmd);
# 发送ARP更新
$cmd = "ssh $ssh_user\@$host \" /sbin/arping -c 3 -A -I $interface $vip \"";
system($cmd);
print "VIP added successfully\n";
}
# 移除VIP
sub remove_vip {
my $host = shift;
print "Removing VIP $vip from $host\n";
my $cmd = "ssh $ssh_user\@$host \" /sbin/ip addr del $vip dev $interface \"";
system($cmd);
print "VIP removed successfully\n";
}
# 执行主函数
main();
exit 0;
4.5 MHA管理命令
# 1. 检查SSH连接
masterha_check_ssh --conf=/etc/mha/app1.cnf
# 2. 检查复制状态
masterha_check_repl --conf=/etc/mha/app1.cnf
# 3. 启动MHA监控
nohup masterha_manager --conf=/etc/mha/app1.cnf \
> /var/log/mha/app1/manager.log 2>&1 &
# 4. 检查MHA状态
masterha_check_status --conf=/etc/mha/app1.cnf
# 5. 停止MHA监控
masterha_stop --conf=/etc/mha/app1.cnf
# 6. 手动故障切换
masterha_master_switch --conf=/etc/mha/app1.cnf \
--master_state=alive \
--new_master_host=192.168.1.101 \
--orig_master_is_new_slave \
--interactive=0
# 7. 查看日志
tail -f /var/log/mha/app1/manager.log
4.6 MHA监控脚本
#!/bin/bash
# monitor_mha.sh
CONFIG_FILE="/etc/mha/app1.cnf"
LOG_FILE="/var/log/mha/app1/manager.log"
ALERT_EMAIL="dba@company.com"
SLACK_WEBHOOK="https://hooks.slack.com/services/xxx"
check_mha_status() {
STATUS=$(masterha_check_status --conf=$CONFIG_FILE 2>&1)
if echo "$STATUS" | grep -q "stopped"; then
send_alert "MHA Manager Stopped" "MHA监控已停止,需要手动重启"
return 1
elif echo "$STATUS" | grep -q "running"; then
echo "MHA is running normally"
return 0
else
send_alert "MHA Status Unknown" "无法获取MHA状态: $STATUS"
return 2
fi
}
check_master_alive() {
MASTER_HOST=$(grep -A1 "\[server1\]" $CONFIG_FILE | grep "hostname" | cut -d= -f2)
if mysql -h $MASTER_HOST -u mha_user -p'MhaPass123' -e "SELECT 1" &>/dev/null; then
echo "Master is alive"
return 0
else
send_alert "Master Database Down" "主库 $MASTER_HOST 无法连接"
return 1
fi
}
check_replication_health() {
# 检查所有从库的复制状态
SLAVES=$(grep "hostname" $CONFIG_FILE | grep -v "server1" | cut -d= -f2)
for SLAVE in $SLAVES; do
STATUS=$(mysql -h $SLAVE -u mha_user -p'MhaPass123' -e "SHOW SLAVE STATUS\G" 2>/dev/null)
if [ -z "$STATUS" ]; then
send_alert "Slave Unreachable" "从库 $SLAVE 无法连接"
continue
fi
IO_RUNNING=$(echo "$STATUS" | grep "Slave_IO_Running" | awk '{print $2}')
SQL_RUNNING=$(echo "$STATUS" | grep "Slave_SQL_Running" | awk '{print $2}')
DELAY=$(echo "$STATUS" | grep "Seconds_Behind_Master" | awk '{print $2}')
if [ "$IO_RUNNING" != "Yes" ] || [ "$SQL_RUNNING" != "Yes" ]; then
send_alert "Slave Replication Stopped" "从库 $SLAVE 复制已停止"
elif [ "$DELAY" -gt 300 ]; then # 5分钟延迟
send_alert "Slave Replication Delay" "从库 $SLAVE 延迟 $DELAY 秒"
fi
done
}
send_alert() {
SUBJECT="$1"
MESSAGE="$2"
# 发送邮件
echo "$MESSAGE" | mail -s "[MHA Alert] $SUBJECT" $ALERT_EMAIL
# 发送Slack消息
curl -X POST -H 'Content-type: application/json' \
--data "{\"text\":\"*MHA告警*: $SUBJECT\n$MESSAGE\"}" \
$SLACK_WEBHOOK
}
# 主监控循环
while true; do
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checking MHA status..."
check_mha_status
check_master_alive
check_replication_health
sleep 60 # 每分钟检查一次
done
五、MySQL InnoDB Cluster(MySQL 8.0+)
5.1 InnoDB Cluster架构
MySQL Shell
↓
MySQL Router → [读写端口] → Primary (主节点)
↓ ↙ ↘
[只读端口] Secondary Secondary
(从节点) (从节点)
5.2 集群部署步骤
# 1. 安装MySQL Shell
wget https://dev.mysql.com/get/mysql-shell-8.0.33-linux-glibc2.12-x86-64bit.tar.gz
tar -xzf mysql-shell-8.0.33-linux