MySQL索引优化实战:从基础到高级调优技巧
Orion K Lv6

索引是MySQL性能优化的核心,正确使用索引可以将查询性能提升几个数量级。本文将深入探讨MySQL 5.7和8.4中的索引优化技巧,从基础概念到高级调优策略。

索引基础理论

1. 索引类型详解

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
-- 1. 主键索引(聚簇索引)
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
username VARCHAR(50) NOT NULL,
email VARCHAR(100) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 2. 唯一索引
CREATE UNIQUE INDEX idx_username ON users(username);
CREATE UNIQUE INDEX idx_email ON users(email);

-- 3. 普通索引
CREATE INDEX idx_created_at ON users(created_at);

-- 4. 复合索引
CREATE INDEX idx_username_email ON users(username, email);

-- 5. 前缀索引
CREATE INDEX idx_email_prefix ON users(email(10));

-- 6. 函数索引(MySQL 8.0+)
CREATE INDEX idx_upper_username ON users((UPPER(username)));

-- 7. 降序索引(MySQL 8.0+)
CREATE INDEX idx_created_desc ON users(created_at DESC);

2. 索引存储结构分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- 查看索引信息
SHOW INDEX FROM users;

-- 查看表的存储引擎和索引统计
SHOW TABLE STATUS LIKE 'users'\G

-- 分析索引使用情况
SELECT
TABLE_SCHEMA,
TABLE_NAME,
INDEX_NAME,
COLUMN_NAME,
SEQ_IN_INDEX,
CARDINALITY,
INDEX_TYPE
FROM information_schema.STATISTICS
WHERE TABLE_SCHEMA = 'your_database'
AND TABLE_NAME = 'users'
ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX;

索引设计策略

1. 复合索引设计原则

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
-- 创建测试表
CREATE TABLE orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
status VARCHAR(20) NOT NULL,
order_date DATE NOT NULL,
amount DECIMAL(10,2) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 插入测试数据
INSERT INTO orders (user_id, status, order_date, amount) VALUES
(1, 'pending', '2023-09-01', 100.00),
(2, 'completed', '2023-09-02', 200.00),
(1, 'completed', '2023-09-03', 150.00),
(3, 'pending', '2023-09-04', 300.00),
(2, 'cancelled', '2023-09-05', 250.00);

-- 错误的索引设计
CREATE INDEX idx_bad_1 ON orders(status);
CREATE INDEX idx_bad_2 ON orders(user_id);
CREATE INDEX idx_bad_3 ON orders(order_date);

-- 正确的复合索引设计(遵循最左前缀原则)
-- 根据查询频率和选择性设计
CREATE INDEX idx_user_status_date ON orders(user_id, status, order_date);

-- 测试查询性能
-- 这些查询都能使用上面的复合索引
EXPLAIN SELECT * FROM orders WHERE user_id = 1;
EXPLAIN SELECT * FROM orders WHERE user_id = 1 AND status = 'completed';
EXPLAIN SELECT * FROM orders WHERE user_id = 1 AND status = 'completed' AND order_date = '2023-09-03';

-- 这个查询无法使用索引(跳过了最左列)
EXPLAIN SELECT * FROM orders WHERE status = 'completed' AND order_date = '2023-09-03';

2. 索引选择性分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
-- 分析列的选择性(不重复值的比例)
SELECT
'user_id' as column_name,
COUNT(DISTINCT user_id) as distinct_values,
COUNT(*) as total_rows,
COUNT(DISTINCT user_id) / COUNT(*) as selectivity
FROM orders
UNION ALL
SELECT
'status',
COUNT(DISTINCT status),
COUNT(*),
COUNT(DISTINCT status) / COUNT(*)
FROM orders
UNION ALL
SELECT
'order_date',
COUNT(DISTINCT order_date),
COUNT(*),
COUNT(DISTINCT order_date) / COUNT(*)
FROM orders;

-- 基于选择性优化索引顺序
-- 高选择性的列放在前面
CREATE INDEX idx_optimized ON orders(order_date, user_id, status);

查询优化技巧

1. EXPLAIN 分析详解

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
-- 详细的 EXPLAIN 分析
EXPLAIN FORMAT=JSON
SELECT o.*, u.username
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.status = 'completed'
AND o.order_date BETWEEN '2023-09-01' AND '2023-09-30'
ORDER BY o.created_at DESC
LIMIT 10;

-- 分析关键指标
-- 1. type: 连接类型(const > eq_ref > ref > range > index > ALL)
-- 2. key: 使用的索引
-- 3. rows: 扫描的行数
-- 4. Extra: 额外信息

-- 优化前后对比
-- 优化前:全表扫描
SELECT * FROM orders WHERE YEAR(order_date) = 2023;

-- 优化后:使用索引范围扫描
SELECT * FROM orders WHERE order_date >= '2023-01-01' AND order_date < '2024-01-01';

2. 索引覆盖查询优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- 创建覆盖索引
CREATE INDEX idx_covering ON orders(user_id, status, order_date, amount);

-- 覆盖索引查询(不需要回表)
EXPLAIN SELECT user_id, status, order_date, amount
FROM orders
WHERE user_id = 1 AND status = 'completed';

-- 非覆盖索引查询(需要回表)
EXPLAIN SELECT *
FROM orders
WHERE user_id = 1 AND status = 'completed';

-- 优化技巧:只查询需要的列
-- 不好的做法
SELECT * FROM orders WHERE user_id = 1;

-- 好的做法
SELECT id, status, amount FROM orders WHERE user_id = 1;

高级索引优化

1. 分区表索引优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- 创建分区表
CREATE TABLE orders_partitioned (
id INT AUTO_INCREMENT,
user_id INT NOT NULL,
status VARCHAR(20) NOT NULL,
order_date DATE NOT NULL,
amount DECIMAL(10,2) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id, order_date)
) PARTITION BY RANGE (YEAR(order_date)) (
PARTITION p2022 VALUES LESS THAN (2023),
PARTITION p2023 VALUES LESS THAN (2024),
PARTITION p2024 VALUES LESS THAN (2025),
PARTITION p_future VALUES LESS THAN MAXVALUE
);

-- 分区表索引策略
-- 1. 分区键必须包含在主键中
-- 2. 每个分区都有独立的索引
CREATE INDEX idx_user_status ON orders_partitioned(user_id, status);

-- 查询分区信息
SELECT
PARTITION_NAME,
TABLE_ROWS,
DATA_LENGTH,
INDEX_LENGTH
FROM information_schema.PARTITIONS
WHERE TABLE_SCHEMA = 'your_database'
AND TABLE_NAME = 'orders_partitioned';

2. MySQL 8.4 新特性

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
-- 1. 不可见索引(MySQL 8.0+)
CREATE INDEX idx_invisible ON orders(amount) INVISIBLE;

-- 测试索引效果而不删除索引
ALTER INDEX idx_invisible ON orders INVISIBLE;
ALTER INDEX idx_invisible ON orders VISIBLE;

-- 2. 函数索引优化
CREATE INDEX idx_month_year ON orders((YEAR(order_date)), (MONTH(order_date)));

-- 使用函数索引
EXPLAIN SELECT * FROM orders
WHERE YEAR(order_date) = 2023 AND MONTH(order_date) = 9;

-- 3. 多值索引(MySQL 8.0.17+)
CREATE TABLE products (
id INT PRIMARY KEY,
name VARCHAR(100),
tags JSON,
INDEX idx_tags ((CAST(tags->'$[*]' AS CHAR(50) ARRAY)))
);

-- 4. 直方图统计(MySQL 8.0+)
ANALYZE TABLE orders UPDATE HISTOGRAM ON user_id, status WITH 100 BUCKETS;

-- 查看直方图信息
SELECT * FROM information_schema.COLUMN_STATISTICS
WHERE SCHEMA_NAME = 'your_database' AND TABLE_NAME = 'orders';

索引监控和维护

1. 索引使用情况监控

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
-- 查看索引使用统计
SELECT
OBJECT_SCHEMA,
OBJECT_NAME,
INDEX_NAME,
COUNT_FETCH,
COUNT_INSERT,
COUNT_UPDATE,
COUNT_DELETE
FROM performance_schema.table_io_waits_summary_by_index_usage
WHERE OBJECT_SCHEMA = 'your_database'
ORDER BY COUNT_FETCH DESC;

-- 查找未使用的索引
SELECT
t.TABLE_SCHEMA,
t.TABLE_NAME,
t.INDEX_NAME
FROM information_schema.STATISTICS t
LEFT JOIN performance_schema.table_io_waits_summary_by_index_usage p
ON t.TABLE_SCHEMA = p.OBJECT_SCHEMA
AND t.TABLE_NAME = p.OBJECT_NAME
AND t.INDEX_NAME = p.INDEX_NAME
WHERE t.TABLE_SCHEMA = 'your_database'
AND p.INDEX_NAME IS NULL
AND t.INDEX_NAME != 'PRIMARY'
GROUP BY t.TABLE_SCHEMA, t.TABLE_NAME, t.INDEX_NAME;

-- 查看索引碎片情况
SELECT
TABLE_SCHEMA,
TABLE_NAME,
DATA_LENGTH,
INDEX_LENGTH,
DATA_FREE,
(DATA_FREE / (DATA_LENGTH + INDEX_LENGTH)) * 100 as fragmentation_percent
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'your_database'
AND DATA_FREE > 0
ORDER BY fragmentation_percent DESC;

2. 索引维护脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash
# MySQL 索引维护脚本

MYSQL_USER="your_user"
MYSQL_PASS="your_pass"
MYSQL_HOST="localhost"
MYSQL_DB="your_database"

# 分析表和索引
analyze_tables() {
echo "开始分析表和索引..."
mysql -u${MYSQL_USER} -p${MYSQL_PASS} -h${MYSQL_HOST} ${MYSQL_DB} -e "
SELECT CONCAT('ANALYZE TABLE ', TABLE_SCHEMA, '.', TABLE_NAME, ';') as analyze_stmt
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = '${MYSQL_DB}'
AND TABLE_TYPE = 'BASE TABLE';" | grep ANALYZE | mysql -u${MYSQL_USER} -p${MYSQL_PASS} -h${MYSQL_HOST} ${MYSQL_DB}
}

# 优化表
optimize_tables() {
echo "开始优化表..."
mysql -u${MYSQL_USER} -p${MYSQL_PASS} -h${MYSQL_HOST} ${MYSQL_DB} -e "
SELECT CONCAT('OPTIMIZE TABLE ', TABLE_SCHEMA, '.', TABLE_NAME, ';') as optimize_stmt
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = '${MYSQL_DB}'
AND TABLE_TYPE = 'BASE TABLE'
AND DATA_FREE > 1024*1024;" | grep OPTIMIZE | mysql -u${MYSQL_USER} -p${MYSQL_PASS} -h${MYSQL_HOST} ${MYSQL_DB}
}

# 检查重复索引
check_duplicate_indexes() {
echo "检查重复索引..."
mysql -u${MYSQL_USER} -p${MYSQL_PASS} -h${MYSQL_HOST} ${MYSQL_DB} -e "
SELECT
TABLE_NAME,
GROUP_CONCAT(INDEX_NAME) as duplicate_indexes,
GROUP_CONCAT(COLUMN_NAME ORDER BY SEQ_IN_INDEX) as columns
FROM information_schema.STATISTICS
WHERE TABLE_SCHEMA = '${MYSQL_DB}'
GROUP BY TABLE_NAME, GROUP_CONCAT(COLUMN_NAME ORDER BY SEQ_IN_INDEX)
HAVING COUNT(*) > 1;"
}

# 主函数
main() {
echo "=== MySQL 索引维护开始 $(date) ==="
analyze_tables
optimize_tables
check_duplicate_indexes
echo "=== MySQL 索引维护完成 $(date) ==="
}

main

实战案例分析

1. 慢查询优化案例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
-- 原始慢查询
SELECT
u.username,
COUNT(o.id) as order_count,
SUM(o.amount) as total_amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
AND o.status = 'completed'
GROUP BY u.id, u.username
HAVING total_amount > 1000
ORDER BY total_amount DESC
LIMIT 20;

-- 分析查询执行计划
EXPLAIN FORMAT=JSON [上述查询];

-- 优化步骤1:创建合适的索引
CREATE INDEX idx_users_created ON users(created_at);
CREATE INDEX idx_orders_status_user ON orders(status, user_id, amount);

-- 优化步骤2:重写查询
SELECT
u.username,
o.order_count,
o.total_amount
FROM users u
JOIN (
SELECT
user_id,
COUNT(*) as order_count,
SUM(amount) as total_amount
FROM orders
WHERE status = 'completed'
GROUP BY user_id
HAVING total_amount > 1000
) o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
ORDER BY o.total_amount DESC
LIMIT 20;

2. 大表索引优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- 大表索引创建策略
-- 1. 在线添加索引(MySQL 5.7+)
ALTER TABLE large_table ADD INDEX idx_new_column(column_name), ALGORITHM=INPLACE, LOCK=NONE;

-- 2. 分批创建索引(避免锁表)
-- 使用 pt-online-schema-change 工具
-- pt-online-schema-change --alter "ADD INDEX idx_new(column_name)" D=database,t=large_table --execute

-- 3. 监控索引创建进度
SELECT
PROCESSLIST_ID,
PROCESSLIST_STATE,
PROCESSLIST_INFO,
PROCESSLIST_TIME
FROM performance_schema.threads
WHERE PROCESSLIST_INFO LIKE '%ADD INDEX%';

性能测试和基准

1. 索引性能测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
-- 创建测试数据
DELIMITER //
CREATE PROCEDURE generate_test_data(IN num_rows INT)
BEGIN
DECLARE i INT DEFAULT 1;
DECLARE batch_size INT DEFAULT 1000;

WHILE i <= num_rows DO
INSERT INTO orders (user_id, status, order_date, amount)
SELECT
FLOOR(1 + RAND() * 1000) as user_id,
ELT(FLOOR(1 + RAND() * 4), 'pending', 'completed', 'cancelled', 'shipped') as status,
DATE_ADD('2023-01-01', INTERVAL FLOOR(RAND() * 365) DAY) as order_date,
ROUND(RAND() * 1000, 2) as amount
FROM
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t2,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t3,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t4
LIMIT batch_size;

SET i = i + batch_size;
END WHILE;
END //
DELIMITER ;

-- 生成测试数据
CALL generate_test_data(100000);

-- 性能测试脚本
-- 测试无索引查询
SET @start_time = NOW(6);
SELECT COUNT(*) FROM orders WHERE user_id = 500 AND status = 'completed';
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) as execution_time_microseconds;

-- 创建索引后测试
CREATE INDEX idx_user_status ON orders(user_id, status);

SET @start_time = NOW(6);
SELECT COUNT(*) FROM orders WHERE user_id = 500 AND status = 'completed';
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) as execution_time_microseconds;

2. 基准测试工具

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
# 使用 sysbench 进行索引性能测试

# 准备测试数据
sysbench oltp_read_write \
--mysql-host=localhost \
--mysql-port=3306 \
--mysql-user=test_user \
--mysql-password=test_pass \
--mysql-db=test_db \
--tables=1 \
--table-size=100000 \
prepare

# 测试读性能
sysbench oltp_read_only \
--mysql-host=localhost \
--mysql-port=3306 \
--mysql-user=test_user \
--mysql-password=test_pass \
--mysql-db=test_db \
--tables=1 \
--table-size=100000 \
--threads=16 \
--time=60 \
--report-interval=10 \
run

# 清理测试数据
sysbench oltp_read_write \
--mysql-host=localhost \
--mysql-port=3306 \
--mysql-user=test_user \
--mysql-password=test_pass \
--mysql-db=test_db \
--tables=1 \
cleanup

最佳实践总结

1. 索引设计原则

  1. 选择性原则: 高选择性的列适合建索引
  2. 最左前缀原则: 复合索引遵循最左前缀匹配
  3. 覆盖索引原则: 尽量使用覆盖索引避免回表
  4. 适度原则: 不要过度创建索引,影响写性能

2. 索引维护策略

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
-- 定期维护索引的SQL脚本
-- 1. 分析表统计信息
ANALYZE TABLE your_table;

-- 2. 优化表碎片
OPTIMIZE TABLE your_table;

-- 3. 检查索引使用情况
SELECT
OBJECT_NAME,
INDEX_NAME,
COUNT_FETCH,
COUNT_INSERT,
COUNT_UPDATE,
COUNT_DELETE
FROM performance_schema.table_io_waits_summary_by_index_usage
WHERE OBJECT_SCHEMA = 'your_database'
ORDER BY COUNT_FETCH DESC;

-- 4. 删除未使用的索引
-- DROP INDEX unused_index_name ON table_name;

3. 监控告警

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Python 索引监控脚本
import pymysql
import json
from datetime import datetime

class IndexMonitor:
def __init__(self, db_config):
self.db_config = db_config

def get_connection(self):
return pymysql.connect(**self.db_config)

def check_index_usage(self):
"""检查索引使用情况"""
conn = self.get_connection()
try:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
sql = """
SELECT
OBJECT_NAME as table_name,
INDEX_NAME as index_name,
COUNT_FETCH as fetch_count,
COUNT_INSERT as insert_count,
COUNT_UPDATE as update_count,
COUNT_DELETE as delete_count
FROM performance_schema.table_io_waits_summary_by_index_usage
WHERE OBJECT_SCHEMA = %s
AND COUNT_FETCH = 0
AND INDEX_NAME != 'PRIMARY'
ORDER BY OBJECT_NAME, INDEX_NAME
"""
cursor.execute(sql, (self.db_config['database'],))
return cursor.fetchall()
finally:
conn.close()

def check_table_fragmentation(self):
"""检查表碎片情况"""
conn = self.get_connection()
try:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
sql = """
SELECT
TABLE_NAME,
DATA_LENGTH,
INDEX_LENGTH,
DATA_FREE,
ROUND((DATA_FREE / (DATA_LENGTH + INDEX_LENGTH)) * 100, 2) as fragmentation_percent
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = %s
AND DATA_FREE > 0
AND (DATA_LENGTH + INDEX_LENGTH) > 0
ORDER BY fragmentation_percent DESC
"""
cursor.execute(sql, (self.db_config['database'],))
return cursor.fetchall()
finally:
conn.close()

def generate_report(self):
"""生成监控报告"""
unused_indexes = self.check_index_usage()
fragmented_tables = self.check_table_fragmentation()

report = {
'timestamp': datetime.now().isoformat(),
'unused_indexes': unused_indexes,
'fragmented_tables': fragmented_tables,
'recommendations': []
}

# 生成建议
if unused_indexes:
report['recommendations'].append("发现未使用的索引,建议考虑删除")

if fragmented_tables:
high_frag_tables = [t for t in fragmented_tables if t['fragmentation_percent'] > 10]
if high_frag_tables:
report['recommendations'].append("发现高碎片表,建议执行 OPTIMIZE TABLE")

return report

# 使用示例
if __name__ == "__main__":
db_config = {
'host': 'localhost',
'user': 'monitor_user',
'password': 'monitor_pass',
'database': 'your_database',
'charset': 'utf8mb4'
}

monitor = IndexMonitor(db_config)
report = monitor.generate_report()

print(json.dumps(report, indent=2, ensure_ascii=False))

通过以上索引优化技巧和监控策略,可以显著提升MySQL数据库的查询性能,确保数据库在高并发场景下的稳定运行。记住,索引优化是一个持续的过程,需要根据业务变化和数据增长不断调整优化策略。

本站由 提供部署服务