MySQL主从复制配置实战:高可用架构搭建指南
Orion K Lv6

MySQL主从复制是实现数据库高可用性和读写分离的核心技术。本文将详细介绍如何配置和管理MySQL主从复制环境,包括故障处理和性能优化。

主从复制原理

1. 复制工作机制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
-- 复制涉及的关键组件
-- 1. 主服务器的二进制日志 (Binary Log)
-- 2. 从服务器的中继日志 (Relay Log)
-- 3. 从服务器的IO线程和SQL线程

-- 查看主服务器状态
SHOW MASTER STATUS;

-- 查看从服务器状态
SHOW SLAVE STATUS\G

-- 查看复制相关的系统变量
SHOW VARIABLES LIKE '%log_bin%';
SHOW VARIABLES LIKE '%server_id%';
SHOW VARIABLES LIKE '%relay%';

-- 复制流程说明:
-- 1. 主服务器将数据变更记录到二进制日志
-- 2. 从服务器的IO线程连接主服务器,读取二进制日志
-- 3. IO线程将读取的日志写入本地的中继日志
-- 4. SQL线程读取中继日志并执行其中的SQL语句

2. 复制模式对比

1
2
3
4
5
6
7
8
9
10
11
12
13
-- 查看当前复制模式
SELECT @@binlog_format;

-- 三种复制模式:
-- 1. STATEMENT: 基于语句的复制
-- 2. ROW: 基于行的复制 (推荐)
-- 3. MIXED: 混合模式

-- 设置复制模式
SET GLOBAL binlog_format = 'ROW';

-- 在 my.cnf 中永久配置
-- binlog_format = ROW

主从复制配置

1. 主服务器配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 主服务器 my.cnf 配置
[mysqld]
# 服务器ID (必须唯一)
server-id = 1

# 启用二进制日志
log-bin = mysql-bin
binlog_format = ROW

# 二进制日志过期时间 (天)
expire_logs_days = 7

# 同步二进制日志到磁盘
sync_binlog = 1

# InnoDB 事务日志刷新
innodb_flush_log_at_trx_commit = 1

# 复制过滤 (可选)
# binlog-do-db = database1
# binlog-ignore-db = mysql
# binlog-ignore-db = information_schema
# binlog-ignore-db = performance_schema

# 半同步复制 (可选)
# plugin-load = "rpl_semi_sync_master=semisync_master.so"
# rpl_semi_sync_master_enabled = 1
# rpl_semi_sync_master_timeout = 1000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- 主服务器SQL配置
-- 1. 创建复制用户
CREATE USER 'repl_user'@'%' IDENTIFIED BY 'strong_password';
GRANT REPLICATION SLAVE ON *.* TO 'repl_user'@'%';
FLUSH PRIVILEGES;

-- 2. 锁定表并获取二进制日志位置
FLUSH TABLES WITH READ LOCK;
SHOW MASTER STATUS;
-- 记录 File 和 Position 值

-- 3. 备份数据 (在另一个终端执行)
-- mysqldump -u root -p --all-databases --master-data=2 > master_backup.sql

-- 4. 解锁表
UNLOCK TABLES;

2. 从服务器配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 从服务器 my.cnf 配置
[mysqld]
# 服务器ID (必须唯一,不同于主服务器)
server-id = 2

# 启用中继日志
relay-log = relay-bin

# 从服务器只读 (可选)
read_only = 1
super_read_only = 1

# 跳过某些错误 (谨慎使用)
# slave-skip-errors = 1062,1032

# 复制过滤 (可选)
# replicate-do-db = database1
# replicate-ignore-db = mysql
# replicate-ignore-table = database1.temp_table

# 半同步复制 (可选)
# plugin-load = "rpl_semi_sync_slave=semisync_slave.so"
# rpl_semi_sync_slave_enabled = 1

# 并行复制 (MySQL 5.7+)
slave_parallel_type = LOGICAL_CLOCK
slave_parallel_workers = 4
slave_preserve_commit_order = 1

# GTID 复制 (MySQL 5.6+)
gtid_mode = ON
enforce_gtid_consistency = ON
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-- 从服务器SQL配置
-- 1. 恢复主服务器数据
-- mysql -u root -p < master_backup.sql

-- 2. 配置主从复制
CHANGE MASTER TO
MASTER_HOST='192.168.1.100',
MASTER_USER='repl_user',
MASTER_PASSWORD='strong_password',
MASTER_LOG_FILE='mysql-bin.000001',
MASTER_LOG_POS=154;

-- 3. 启动从服务器
START SLAVE;

-- 4. 检查复制状态
SHOW SLAVE STATUS\G

-- 关键状态字段说明:
-- Slave_IO_Running: Yes (IO线程运行状态)
-- Slave_SQL_Running: Yes (SQL线程运行状态)
-- Seconds_Behind_Master: 0 (复制延迟秒数)
-- Last_IO_Error: (IO错误信息)
-- Last_SQL_Error: (SQL错误信息)

GTID 复制配置

1. GTID 基础概念

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
-- GTID (Global Transaction Identifier) 全局事务标识符
-- 格式: server_uuid:transaction_id
-- 例如: 3E11FA47-71CA-11E1-9E33-C80AA9429562:23

-- 查看 GTID 相关变量
SHOW VARIABLES LIKE '%gtid%';

-- 查看已执行的 GTID 集合
SELECT @@gtid_executed;

-- 查看已清除的 GTID 集合
SELECT @@gtid_purged;

-- 查看服务器 UUID
SELECT @@server_uuid;

2. GTID 复制配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 主从服务器都需要的 GTID 配置
[mysqld]
# 启用 GTID
gtid_mode = ON
enforce_gtid_consistency = ON

# 二进制日志和复制相关
log_bin = mysql-bin
log_slave_updates = 1
binlog_format = ROW

# 服务器ID
server_id = 1 # 主服务器
# server_id = 2 # 从服务器
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- GTID 复制设置 (从服务器)
-- 1. 停止传统复制 (如果已配置)
STOP SLAVE;
RESET SLAVE ALL;

-- 2. 配置基于 GTID 的复制
CHANGE MASTER TO
MASTER_HOST='192.168.1.100',
MASTER_USER='repl_user',
MASTER_PASSWORD='strong_password',
MASTER_AUTO_POSITION=1;

-- 3. 启动复制
START SLAVE;

-- 4. 检查状态
SHOW SLAVE STATUS\G
-- 注意: Auto_Position 应该为 1
-- Retrieved_Gtid_Set 和 Executed_Gtid_Set 显示 GTID 信息

多从服务器配置

1. 一主多从架构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
# 一主多从配置脚本

MASTER_HOST="192.168.1.100"
REPL_USER="repl_user"
REPL_PASS="strong_password"

# 从服务器列表
SLAVES=("192.168.1.101" "192.168.1.102" "192.168.1.103")

# 配置每个从服务器
for i in "${!SLAVES[@]}"; do
SLAVE_HOST="${SLAVES[$i]}"
SERVER_ID=$((i + 2)) # 从2开始编号

echo "配置从服务器: $SLAVE_HOST (server-id: $SERVER_ID)"

# 连接到从服务器并配置复制
mysql -h $SLAVE_HOST -u root -p << EOF
-- 设置服务器ID
SET GLOBAL server_id = $SERVER_ID;

-- 配置复制
CHANGE MASTER TO
MASTER_HOST='$MASTER_HOST',
MASTER_USER='$REPL_USER',
MASTER_PASSWORD='$REPL_PASS',
MASTER_AUTO_POSITION=1;

-- 启动复制
START SLAVE;

-- 检查状态
SHOW SLAVE STATUS\G
EOF

done

2. 级联复制配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
-- 级联复制: Master -> Slave1 -> Slave2

-- Slave1 配置 (既是从服务器,也是主服务器)
-- my.cnf 配置
-- server-id = 2
-- log-bin = mysql-bin
-- log-slave-updates = 1 # 重要:记录从主服务器复制的更新

-- Slave1 作为 Slave2 的主服务器
-- 在 Slave1 上创建复制用户
CREATE USER 'repl_user2'@'%' IDENTIFIED BY 'strong_password2';
GRANT REPLICATION SLAVE ON *.* TO 'repl_user2'@'%';

-- Slave2 配置
CHANGE MASTER TO
MASTER_HOST='slave1_ip',
MASTER_USER='repl_user2',
MASTER_PASSWORD='strong_password2',
MASTER_AUTO_POSITION=1;

START SLAVE;

复制监控和维护

1. 复制状态监控

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
-- 创建复制监控视图
CREATE VIEW replication_status AS
SELECT
'Master' as server_role,
@@server_id as server_id,
@@hostname as hostname,
NULL as master_host,
NULL as io_running,
NULL as sql_running,
NULL as seconds_behind_master,
NULL as last_error
UNION ALL
SELECT
'Slave' as server_role,
@@server_id as server_id,
@@hostname as hostname,
SUBSTRING_INDEX(SUBSTRING_INDEX(
(SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Slave_running'), ',', 1), ':', -1) as master_host,
(SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Slave_IO_Running') as io_running,
(SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Slave_SQL_Running') as sql_running,
(SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Seconds_Behind_Master') as seconds_behind_master,
COALESCE(
(SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Last_IO_Error'),
(SELECT VARIABLE_VALUE FROM performance_schema.global_status
WHERE VARIABLE_NAME = 'Last_SQL_Error')
) as last_error;

-- 使用监控视图
SELECT * FROM replication_status;

-- 详细的复制延迟监控
SELECT
CHANNEL_NAME,
SERVICE_STATE,
LAST_ERROR_MESSAGE,
LAST_ERROR_TIMESTAMP
FROM performance_schema.replication_connection_status;

SELECT
CHANNEL_NAME,
SERVICE_STATE,
COUNT_TRANSACTIONS_RETRIES,
LAST_ERROR_MESSAGE,
LAST_ERROR_TIMESTAMP
FROM performance_schema.replication_applier_status;

2. 复制监控脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
import pymysql
import time
import json
from datetime import datetime

class ReplicationMonitor:
def __init__(self, servers_config):
self.servers = servers_config

def check_replication_status(self):
"""检查所有服务器的复制状态"""
results = {}

for server_name, config in self.servers.items():
try:
conn = pymysql.connect(**config)
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
# 检查是否为主服务器
cursor.execute("SHOW MASTER STATUS")
master_status = cursor.fetchone()

# 检查是否为从服务器
cursor.execute("SHOW SLAVE STATUS")
slave_status = cursor.fetchone()

results[server_name] = {
'timestamp': datetime.now().isoformat(),
'is_master': master_status is not None,
'is_slave': slave_status is not None,
'master_status': master_status,
'slave_status': slave_status
}

conn.close()

except Exception as e:
results[server_name] = {
'timestamp': datetime.now().isoformat(),
'error': str(e)
}

return results

def check_replication_lag(self):
"""检查复制延迟"""
lag_info = {}

for server_name, config in self.servers.items():
try:
conn = pymysql.connect(**config)
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute("SHOW SLAVE STATUS")
slave_status = cursor.fetchone()

if slave_status:
lag_info[server_name] = {
'seconds_behind_master': slave_status.get('Seconds_Behind_Master'),
'io_running': slave_status.get('Slave_IO_Running'),
'sql_running': slave_status.get('Slave_SQL_Running'),
'last_io_error': slave_status.get('Last_IO_Error'),
'last_sql_error': slave_status.get('Last_SQL_Error')
}

conn.close()

except Exception as e:
lag_info[server_name] = {'error': str(e)}

return lag_info

def send_alert(self, message):
"""发送告警"""
print(f"ALERT: {message}")
# 这里可以集成邮件、短信或其他告警方式

def monitor_loop(self, interval=60):
"""监控循环"""
while True:
try:
print(f"\n=== 复制监控检查 {datetime.now()} ===")

# 检查复制状态
status = self.check_replication_status()

# 检查复制延迟
lag_info = self.check_replication_lag()

# 分析结果并发送告警
for server_name, info in lag_info.items():
if 'error' in info:
self.send_alert(f"服务器 {server_name} 连接失败: {info['error']}")
continue

# 检查复制线程状态
if info.get('io_running') != 'Yes':
self.send_alert(f"服务器 {server_name} IO线程未运行")

if info.get('sql_running') != 'Yes':
self.send_alert(f"服务器 {server_name} SQL线程未运行")

# 检查复制延迟
lag = info.get('seconds_behind_master')
if lag is not None and lag > 300: # 5分钟
self.send_alert(f"服务器 {server_name} 复制延迟过高: {lag}秒")

# 检查错误信息
if info.get('last_io_error'):
self.send_alert(f"服务器 {server_name} IO错误: {info['last_io_error']}")

if info.get('last_sql_error'):
self.send_alert(f"服务器 {server_name} SQL错误: {info['last_sql_error']}")

# 输出状态摘要
for server_name, info in status.items():
if 'error' not in info:
role = []
if info['is_master']:
role.append('Master')
if info['is_slave']:
role.append('Slave')

print(f"{server_name}: {'/'.join(role) if role else 'Standalone'}")

time.sleep(interval)

except KeyboardInterrupt:
print("\n监控已停止")
break
except Exception as e:
print(f"监控错误: {e}")
time.sleep(interval)

# 使用示例
if __name__ == "__main__":
servers_config = {
'master': {
'host': '192.168.1.100',
'user': 'monitor_user',
'password': 'monitor_pass',
'charset': 'utf8mb4'
},
'slave1': {
'host': '192.168.1.101',
'user': 'monitor_user',
'password': 'monitor_pass',
'charset': 'utf8mb4'
},
'slave2': {
'host': '192.168.1.102',
'user': 'monitor_user',
'password': 'monitor_pass',
'charset': 'utf8mb4'
}
}

monitor = ReplicationMonitor(servers_config)
monitor.monitor_loop(60) # 每分钟检查一次

故障处理和恢复

1. 常见复制问题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
-- 1. 复制中断处理
-- 查看错误信息
SHOW SLAVE STATUS\G

-- 跳过错误 (谨慎使用)
STOP SLAVE;
SET GLOBAL sql_slave_skip_counter = 1;
START SLAVE;

-- 2. 复制延迟处理
-- 查看延迟原因
SELECT * FROM performance_schema.events_statements_current
WHERE THREAD_ID IN (
SELECT THREAD_ID FROM performance_schema.threads
WHERE NAME = 'thread/sql/slave_sql'
);

-- 优化并行复制
SET GLOBAL slave_parallel_workers = 8;
SET GLOBAL slave_parallel_type = 'LOGICAL_CLOCK';

-- 3. 主键冲突处理
-- 查看冲突详情
SHOW SLAVE STATUS\G
-- 查看 Last_SQL_Error 字段

-- 解决方案1: 跳过冲突
STOP SLAVE;
SET GLOBAL sql_slave_skip_counter = 1;
START SLAVE;

-- 解决方案2: 手动解决冲突
-- 在从服务器上删除冲突行,然后重启复制

-- 4. GTID 复制问题
-- 查看 GTID 状态
SELECT @@gtid_executed;
SELECT @@gtid_purged;

-- 重置 GTID (危险操作)
STOP SLAVE;
RESET MASTER;
RESET SLAVE;
SET GLOBAL gtid_purged = 'uuid:1-100';
START SLAVE;

2. 主从切换

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
# 主从切换脚本

OLD_MASTER="192.168.1.100"
NEW_MASTER="192.168.1.101"
SLAVES=("192.168.1.102" "192.168.1.103")

echo "开始主从切换: $OLD_MASTER -> $NEW_MASTER"

# 1. 停止旧主服务器的写入
echo "步骤1: 停止旧主服务器写入"
mysql -h $OLD_MASTER -u root -p << EOF
FLUSH TABLES WITH READ LOCK;
SHOW MASTER STATUS;
EOF

# 2. 确保所有从服务器同步完成
echo "步骤2: 等待从服务器同步完成"
for slave in "${SLAVES[@]}"; do
echo "检查从服务器: $slave"
mysql -h $slave -u root -p << EOF
SHOW SLAVE STATUS\G
EOF
done

# 等待用户确认
read -p "确认所有从服务器已同步完成? (y/N): " confirm
if [[ $confirm != [yY] ]]; then
echo "切换已取消"
exit 1
fi

# 3. 提升新主服务器
echo "步骤3: 提升新主服务器"
mysql -h $NEW_MASTER -u root -p << EOF
STOP SLAVE;
RESET SLAVE ALL;
SET GLOBAL read_only = 0;
SET GLOBAL super_read_only = 0;
SHOW MASTER STATUS;
EOF

# 4. 重新配置其他从服务器
echo "步骤4: 重新配置从服务器"
for slave in "${SLAVES[@]}"; do
if [ "$slave" != "$NEW_MASTER" ]; then
echo "重新配置从服务器: $slave"
mysql -h $slave -u root -p << EOF
STOP SLAVE;
CHANGE MASTER TO
MASTER_HOST='$NEW_MASTER',
MASTER_USER='repl_user',
MASTER_PASSWORD='strong_password',
MASTER_AUTO_POSITION=1;
START SLAVE;
SHOW SLAVE STATUS\G
EOF
fi
done

# 5. 将旧主服务器配置为从服务器
echo "步骤5: 配置旧主服务器为从服务器"
mysql -h $OLD_MASTER -u root -p << EOF
UNLOCK TABLES;
SET GLOBAL read_only = 1;
SET GLOBAL super_read_only = 1;
CHANGE MASTER TO
MASTER_HOST='$NEW_MASTER',
MASTER_USER='repl_user',
MASTER_PASSWORD='strong_password',
MASTER_AUTO_POSITION=1;
START SLAVE;
SHOW SLAVE STATUS\G
EOF

echo "主从切换完成"
echo "新主服务器: $NEW_MASTER"
echo "请更新应用程序配置以连接到新的主服务器"

读写分离实现

1. 应用层读写分离

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pymysql
import random
from contextlib import contextmanager

class MySQLCluster:
def __init__(self, master_config, slave_configs):
self.master_config = master_config
self.slave_configs = slave_configs

@contextmanager
def get_write_connection(self):
"""获取写连接(主服务器)"""
conn = pymysql.connect(**self.master_config)
try:
yield conn
finally:
conn.close()

@contextmanager
def get_read_connection(self):
"""获取读连接(从服务器)"""
# 随机选择一个从服务器
slave_config = random.choice(self.slave_configs)
conn = pymysql.connect(**slave_config)
try:
yield conn
finally:
conn.close()

def execute_write(self, sql, params=None):
"""执行写操作"""
with self.get_write_connection() as conn:
with conn.cursor() as cursor:
cursor.execute(sql, params)
conn.commit()
return cursor.rowcount

def execute_read(self, sql, params=None):
"""执行读操作"""
with self.get_read_connection() as conn:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute(sql, params)
return cursor.fetchall()

def execute_read_one(self, sql, params=None):
"""执行读操作(单行)"""
with self.get_read_connection() as conn:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute(sql, params)
return cursor.fetchone()

# 使用示例
master_config = {
'host': '192.168.1.100',
'user': 'app_user',
'password': 'app_pass',
'database': 'myapp',
'charset': 'utf8mb4'
}

slave_configs = [
{
'host': '192.168.1.101',
'user': 'app_user',
'password': 'app_pass',
'database': 'myapp',
'charset': 'utf8mb4'
},
{
'host': '192.168.1.102',
'user': 'app_user',
'password': 'app_pass',
'database': 'myapp',
'charset': 'utf8mb4'
}
]

cluster = MySQLCluster(master_config, slave_configs)

# 写操作
cluster.execute_write(
"INSERT INTO users (name, email) VALUES (%s, %s)",
('张三', 'zhangsan@example.com')
)

# 读操作
users = cluster.execute_read("SELECT * FROM users WHERE status = %s", ('active',))
user = cluster.execute_read_one("SELECT * FROM users WHERE id = %s", (1,))

2. 中间件读写分离

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# ProxySQL 配置示例
# /etc/proxysql.cnf

datadir="/var/lib/proxysql"

admin_variables=
{
admin_credentials="admin:admin"
mysql_ifaces="0.0.0.0:6032"
}

mysql_variables=
{
threads=4
max_connections=2048
default_query_delay=0
default_query_timeout=36000000
have_compress=true
poll_timeout=2000
interfaces="0.0.0.0:6033"
default_schema="information_schema"
stacksize=1048576
server_version="5.7.25"
connect_timeout_server=3000
monitor_username="monitor"
monitor_password="monitor"
monitor_history=600000
monitor_connect_interval=60000
monitor_ping_interval=10000
monitor_read_only_interval=1500
monitor_read_only_timeout=500
ping_interval_server_msec=120000
ping_timeout_server=500
commands_stats=true
sessions_sort=true
connect_retries_on_failure=10
}

# 服务器配置
mysql_servers =
(
{
address = "192.168.1.100"
port = 3306
hostgroup = 0 # 写组
weight = 1000
comment = "Master"
},
{
address = "192.168.1.101"
port = 3306
hostgroup = 1 # 读组
weight = 900
comment = "Slave1"
},
{
address = "192.168.1.102"
port = 3306
hostgroup = 1 # 读组
weight = 900
comment = "Slave2"
}
)

# 用户配置
mysql_users =
(
{
username = "app_user"
password = "app_pass"
default_hostgroup = 0
max_connections = 200
default_schema = "myapp"
active = 1
}
)

# 查询路由规则
mysql_query_rules =
(
{
rule_id = 1
active = 1
match_pattern = "^SELECT.*FOR UPDATE$"
destination_hostgroup = 0
apply = 1
comment = "SELECT FOR UPDATE to master"
},
{
rule_id = 2
active = 1
match_pattern = "^SELECT"
destination_hostgroup = 1
apply = 1
comment = "SELECT to slaves"
}
)

性能优化

1. 复制性能调优

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- 并行复制优化 (MySQL 5.7+)
SET GLOBAL slave_parallel_type = 'LOGICAL_CLOCK';
SET GLOBAL slave_parallel_workers = 8;
SET GLOBAL slave_preserve_commit_order = 1;

-- 复制缓冲区优化
SET GLOBAL relay_log_space_limit = 1073741824; -- 1GB
SET GLOBAL max_relay_log_size = 134217728; -- 128MB

-- 二进制日志优化
SET GLOBAL sync_binlog = 1;
SET GLOBAL binlog_cache_size = 1048576; -- 1MB
SET GLOBAL max_binlog_cache_size = 4294967296; -- 4GB

-- 半同步复制超时优化
SET GLOBAL rpl_semi_sync_master_timeout = 1000; -- 1秒

-- 查看复制性能统计
SELECT * FROM performance_schema.replication_applier_status_by_worker;

2. 复制延迟优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
# 复制延迟优化脚本

SLAVE_HOST="192.168.1.101"

echo "优化复制延迟配置"

mysql -h $SLAVE_HOST -u root -p << EOF
-- 优化并行复制
SET GLOBAL slave_parallel_type = 'LOGICAL_CLOCK';
SET GLOBAL slave_parallel_workers = 4;
SET GLOBAL slave_preserve_commit_order = 1;

-- 优化复制缓冲区
SET GLOBAL relay_log_space_limit = 1073741824;
SET GLOBAL max_relay_log_size = 134217728;

-- 优化网络相关参数
SET GLOBAL slave_net_timeout = 60;
SET GLOBAL master_retry_count = 86400;

-- 查看当前延迟
SHOW SLAVE STATUS\G
EOF

echo "复制延迟优化完成"

最佳实践总结

1. 复制架构设计原则

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- 复制架构检查清单
SELECT
'服务器ID唯一性' as 检查项,
CASE WHEN COUNT(DISTINCT @@server_id) = 1 THEN '通过' ELSE '失败' END as 状态
FROM (SELECT @@server_id) t
UNION ALL
SELECT
'二进制日志启用',
CASE WHEN @@log_bin = 1 THEN '通过' ELSE '失败' END
UNION ALL
SELECT
'GTID模式',
CASE WHEN @@gtid_mode = 'ON' THEN '通过' ELSE '建议启用' END
UNION ALL
SELECT
'复制格式',
CASE WHEN @@binlog_format = 'ROW' THEN '通过' ELSE '建议使用ROW' END;

2. 监控指标

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 关键监控指标
monitoring_metrics = {
'replication_lag': {
'metric': 'Seconds_Behind_Master',
'threshold': 300, # 5分钟
'alert_level': 'warning'
},
'io_thread_status': {
'metric': 'Slave_IO_Running',
'expected': 'Yes',
'alert_level': 'critical'
},
'sql_thread_status': {
'metric': 'Slave_SQL_Running',
'expected': 'Yes',
'alert_level': 'critical'
},
'replication_errors': {
'metric': 'Last_Error',
'expected': '',
'alert_level': 'critical'
}
}

3. 故障恢复流程

  1. 检测故障: 自动监控系统发现复制中断
  2. 评估影响: 确定数据丢失范围和业务影响
  3. 选择策略:
    • 轻微延迟:等待自动恢复
    • 复制中断:重启复制或跳过错误
    • 主服务器故障:执行主从切换
  4. 执行恢复: 按照预定流程执行恢复操作
  5. 验证结果: 确认数据一致性和复制正常
  6. 更新文档: 记录故障原因和解决方案

4. 安全配置建议

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- 复制用户权限最小化
CREATE USER 'repl_user'@'slave_ip' IDENTIFIED BY 'strong_password';
GRANT REPLICATION SLAVE ON *.* TO 'repl_user'@'slave_ip';

-- 启用SSL复制
CHANGE MASTER TO
MASTER_HOST='master_ip',
MASTER_USER='repl_user',
MASTER_PASSWORD='strong_password',
MASTER_SSL=1,
MASTER_SSL_CA='/path/to/ca.pem',
MASTER_SSL_CERT='/path/to/client-cert.pem',
MASTER_SSL_KEY='/path/to/client-key.pem';

-- 从服务器只读配置
SET GLOBAL read_only = 1;
SET GLOBAL super_read_only = 1;

通过合理配置和管理MySQL主从复制,可以实现数据库的高可用性、读写分离和负载均衡,为业务系统提供稳定可靠的数据服务。记住,复制环境需要持续监控和维护,定期进行故障演练以确保在真实故障发生时能够快速恢复。

本站由 提供部署服务