MySQL查询优化深度指南:SQL性能调优实战技巧
Orion K Lv6

查询优化是MySQL性能调优的核心环节,一个优化良好的查询可以将执行时间从几秒缩短到几毫秒。本文将深入探讨MySQL查询优化的各种技巧和最佳实践。

查询分析基础

1. EXPLAIN 执行计划详解

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
-- 创建测试表
CREATE TABLE employees (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
department_id INT NOT NULL,
salary DECIMAL(10,2) NOT NULL,
hire_date DATE NOT NULL,
email VARCHAR(100) UNIQUE,
INDEX idx_dept_salary (department_id, salary),
INDEX idx_hire_date (hire_date)
);

CREATE TABLE departments (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
manager_id INT,
budget DECIMAL(12,2)
);

-- 插入测试数据
INSERT INTO departments (name, manager_id, budget) VALUES
('Engineering', 1, 1000000.00),
('Marketing', 2, 500000.00),
('Sales', 3, 750000.00),
('HR', 4, 300000.00);

-- 基础 EXPLAIN 分析
EXPLAIN SELECT * FROM employees WHERE department_id = 1;

-- 详细的 JSON 格式分析
EXPLAIN FORMAT=JSON
SELECT e.name, e.salary, d.name as dept_name
FROM employees e
JOIN departments d ON e.department_id = d.id
WHERE e.salary > 50000
ORDER BY e.salary DESC
LIMIT 10;

-- EXPLAIN 关键字段解释
-- id: 查询序列号
-- select_type: 查询类型 (SIMPLE, PRIMARY, SUBQUERY, DERIVED等)
-- table: 表名
-- type: 连接类型 (system > const > eq_ref > ref > range > index > ALL)
-- possible_keys: 可能使用的索引
-- key: 实际使用的索引
-- key_len: 索引长度
-- ref: 索引引用
-- rows: 扫描行数估计
-- Extra: 额外信息

2. 查询性能分析工具

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
-- 开启查询分析
SET profiling = 1;

-- 执行查询
SELECT e.name, e.salary, d.name as dept_name
FROM employees e
JOIN departments d ON e.department_id = d.id
WHERE e.salary BETWEEN 40000 AND 80000
ORDER BY e.salary DESC;

-- 查看查询性能分析
SHOW PROFILES;

-- 查看详细的性能分析
SHOW PROFILE FOR QUERY 1;

-- 查看特定资源的使用情况
SHOW PROFILE CPU, MEMORY FOR QUERY 1;

-- MySQL 8.0+ 使用 Performance Schema
SELECT
EVENT_NAME,
COUNT_STAR,
SUM_TIMER_WAIT/1000000000 as SUM_TIMER_WAIT_SEC,
AVG_TIMER_WAIT/1000000000 as AVG_TIMER_WAIT_SEC
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT LIKE '%employees%'
ORDER BY SUM_TIMER_WAIT DESC
LIMIT 10;

SQL 重写优化技巧

1. 子查询优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
-- 低效的子查询
SELECT * FROM employees
WHERE department_id IN (
SELECT id FROM departments WHERE budget > 500000
);

-- 优化为 JOIN
SELECT e.* FROM employees e
JOIN departments d ON e.department_id = d.id
WHERE d.budget > 500000;

-- EXISTS vs IN 优化
-- 当子查询结果集较小时,使用 IN
SELECT * FROM employees
WHERE department_id IN (1, 2, 3);

-- 当子查询结果集较大时,使用 EXISTS
SELECT * FROM employees e
WHERE EXISTS (
SELECT 1 FROM departments d
WHERE d.id = e.department_id AND d.budget > 500000
);

-- 相关子查询优化
-- 低效的相关子查询
SELECT e1.name, e1.salary
FROM employees e1
WHERE e1.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department_id = e1.department_id
);

-- 优化为窗口函数 (MySQL 8.0+)
SELECT name, salary
FROM (
SELECT
name,
salary,
department_id,
AVG(salary) OVER (PARTITION BY department_id) as avg_dept_salary
FROM employees
) t
WHERE salary > avg_dept_salary;

2. JOIN 优化技巧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
-- 创建更多测试表
CREATE TABLE projects (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
department_id INT NOT NULL,
start_date DATE,
end_date DATE,
INDEX idx_dept_dates (department_id, start_date, end_date)
);

CREATE TABLE employee_projects (
employee_id INT,
project_id INT,
role VARCHAR(50),
PRIMARY KEY (employee_id, project_id),
INDEX idx_project (project_id)
);

-- JOIN 顺序优化
-- MySQL 会自动优化 JOIN 顺序,但了解原理很重要

-- 小表驱动大表
EXPLAIN SELECT /*+ STRAIGHT_JOIN */
d.name as dept_name,
COUNT(e.id) as employee_count
FROM departments d -- 小表
JOIN employees e ON d.id = e.department_id -- 大表
GROUP BY d.id, d.name;

-- 多表 JOIN 优化
SELECT
e.name as employee_name,
d.name as dept_name,
p.name as project_name,
ep.role
FROM employees e
JOIN departments d ON e.department_id = d.id
JOIN employee_projects ep ON e.id = ep.employee_id
JOIN projects p ON ep.project_id = p.id
WHERE d.budget > 500000
AND p.start_date >= '2023-01-01';

-- 使用索引提示优化
SELECT /*+ USE_INDEX(e, idx_dept_salary) */
e.name, e.salary
FROM employees e
WHERE e.department_id = 1 AND e.salary > 50000;

3. 分页查询优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
-- 传统分页的问题
SELECT * FROM employees
ORDER BY id
LIMIT 10000, 20; -- 需要扫描前10000行

-- 优化方案1: 使用索引覆盖
SELECT e.* FROM employees e
JOIN (
SELECT id FROM employees
ORDER BY id
LIMIT 10000, 20
) t ON e.id = t.id;

-- 优化方案2: 游标分页
SELECT * FROM employees
WHERE id > 10000 -- 上一页的最后一个ID
ORDER BY id
LIMIT 20;

-- 优化方案3: 延迟关联
SELECT e.* FROM employees e
JOIN (
SELECT id FROM employees
WHERE department_id = 1
ORDER BY salary DESC
LIMIT 1000, 20
) t ON e.id = t.id;

-- 复杂条件的分页优化
-- 创建复合索引支持排序和过滤
CREATE INDEX idx_dept_salary_id ON employees(department_id, salary DESC, id);

SELECT * FROM employees
WHERE department_id = 1
AND (salary < 60000 OR (salary = 60000 AND id > 1000))
ORDER BY salary DESC, id
LIMIT 20;

高级优化技巧

1. 窗口函数优化 (MySQL 8.0+)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
-- 排名查询优化
-- 传统方式:使用子查询
SELECT
name,
salary,
(SELECT COUNT(*) FROM employees e2
WHERE e2.department_id = e1.department_id
AND e2.salary >= e1.salary) as salary_rank
FROM employees e1
WHERE department_id = 1;

-- 优化:使用窗口函数
SELECT
name,
salary,
RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) as salary_rank
FROM employees
WHERE department_id = 1;

-- 移动平均计算
SELECT
name,
salary,
hire_date,
AVG(salary) OVER (
PARTITION BY department_id
ORDER BY hire_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as moving_avg_salary
FROM employees
ORDER BY department_id, hire_date;

-- 累计统计
SELECT
name,
salary,
SUM(salary) OVER (
PARTITION BY department_id
ORDER BY salary
ROWS UNBOUNDED PRECEDING
) as cumulative_salary
FROM employees
ORDER BY department_id, salary;

2. 条件优化技巧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
-- 避免在 WHERE 子句中使用函数
-- 低效查询
SELECT * FROM employees
WHERE YEAR(hire_date) = 2023;

-- 优化后
SELECT * FROM employees
WHERE hire_date >= '2023-01-01'
AND hire_date < '2024-01-01';

-- 避免隐式类型转换
-- 低效查询(如果 department_id 是 INT 类型)
SELECT * FROM employees WHERE department_id = '1';

-- 优化后
SELECT * FROM employees WHERE department_id = 1;

-- OR 条件优化
-- 低效查询
SELECT * FROM employees
WHERE department_id = 1 OR department_id = 2;

-- 优化为 IN
SELECT * FROM employees
WHERE department_id IN (1, 2);

-- 复杂 OR 条件优化为 UNION
SELECT * FROM employees WHERE department_id = 1
UNION ALL
SELECT * FROM employees WHERE salary > 80000;

-- LIKE 查询优化
-- 低效查询
SELECT * FROM employees WHERE name LIKE '%john%';

-- 如果只需要前缀匹配,使用前缀索引
SELECT * FROM employees WHERE name LIKE 'john%';

-- 全文搜索优化
ALTER TABLE employees ADD FULLTEXT(name);
SELECT * FROM employees WHERE MATCH(name) AGAINST('john' IN NATURAL LANGUAGE MODE);

3. 聚合查询优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
-- COUNT 优化
-- 低效查询
SELECT COUNT(*) FROM employees WHERE department_id = 1;

-- 如果不需要精确计数,可以使用估算
SELECT TABLE_ROWS FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'your_database' AND TABLE_NAME = 'employees';

-- GROUP BY 优化
-- 创建支持 GROUP BY 的索引
CREATE INDEX idx_dept_hire_date ON employees(department_id, hire_date);

-- 优化的聚合查询
SELECT
department_id,
YEAR(hire_date) as hire_year,
COUNT(*) as employee_count,
AVG(salary) as avg_salary
FROM employees
GROUP BY department_id, YEAR(hire_date)
ORDER BY department_id, hire_year;

-- HAVING 子句优化
-- 将能在 WHERE 中过滤的条件移到 WHERE
-- 低效查询
SELECT department_id, COUNT(*) as cnt
FROM employees
GROUP BY department_id
HAVING department_id IN (1, 2, 3) AND cnt > 5;

-- 优化后
SELECT department_id, COUNT(*) as cnt
FROM employees
WHERE department_id IN (1, 2, 3)
GROUP BY department_id
HAVING cnt > 5;

查询缓存和优化器

1. 查询缓存配置 (MySQL 5.7)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- 查看查询缓存状态
SHOW VARIABLES LIKE 'query_cache%';
SHOW STATUS LIKE 'Qcache%';

-- 配置查询缓存 (my.cnf)
-- query_cache_type = 1
-- query_cache_size = 128M
-- query_cache_limit = 2M

-- 查询缓存使用技巧
-- 使用 SQL_CACHE 提示
SELECT SQL_CACHE * FROM employees WHERE department_id = 1;

-- 使用 SQL_NO_CACHE 跳过缓存
SELECT SQL_NO_CACHE * FROM employees WHERE id = 1;

-- 注意:MySQL 8.0 已移除查询缓存,推荐使用应用层缓存

2. 优化器提示 (MySQL 8.0+)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
-- 强制使用特定索引
SELECT /*+ USE_INDEX(employees idx_dept_salary) */
* FROM employees
WHERE department_id = 1 AND salary > 50000;

-- 强制使用特定 JOIN 算法
SELECT /*+ HASH_JOIN(e, d) */
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;

-- 控制 JOIN 顺序
SELECT /*+ STRAIGHT_JOIN */
e.name, d.name
FROM departments d
JOIN employees e ON d.id = e.department_id
WHERE d.budget > 500000;

-- 并行查询提示
SELECT /*+ SET_VAR(optimizer_switch = 'block_nested_loop=off') */
* FROM employees e1
JOIN employees e2 ON e1.department_id = e2.department_id
WHERE e1.id != e2.id;

性能监控和诊断

1. 慢查询日志分析

1
2
3
4
5
6
7
8
9
10
11
# 启用慢查询日志 (my.cnf)
# slow_query_log = 1
# slow_query_log_file = /var/log/mysql/slow.log
# long_query_time = 1
# log_queries_not_using_indexes = 1

# 分析慢查询日志
mysqldumpslow -s t -t 10 /var/log/mysql/slow.log

# 使用 pt-query-digest 分析
pt-query-digest /var/log/mysql/slow.log > slow_query_report.txt

2. Performance Schema 监控

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
-- 启用 Performance Schema 监控
UPDATE performance_schema.setup_instruments
SET ENABLED = 'YES', TIMED = 'YES'
WHERE NAME LIKE 'statement/%';

UPDATE performance_schema.setup_consumers
SET ENABLED = 'YES'
WHERE NAME LIKE '%statements%';

-- 查看最耗时的查询
SELECT
DIGEST_TEXT,
COUNT_STAR,
SUM_TIMER_WAIT/1000000000 as SUM_TIMER_WAIT_SEC,
AVG_TIMER_WAIT/1000000000 as AVG_TIMER_WAIT_SEC,
SUM_ROWS_EXAMINED,
SUM_ROWS_SENT
FROM performance_schema.events_statements_summary_by_digest
ORDER BY SUM_TIMER_WAIT DESC
LIMIT 10;

-- 查看表的访问统计
SELECT
OBJECT_SCHEMA,
OBJECT_NAME,
COUNT_READ,
COUNT_WRITE,
COUNT_FETCH,
COUNT_INSERT,
COUNT_UPDATE,
COUNT_DELETE
FROM performance_schema.table_io_waits_summary_by_table
WHERE OBJECT_SCHEMA = 'your_database'
ORDER BY COUNT_READ + COUNT_WRITE DESC;

3. 实时查询监控脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
import pymysql
import time
import json
from datetime import datetime

class QueryMonitor:
def __init__(self, db_config):
self.db_config = db_config
self.connection = None

def connect(self):
self.connection = pymysql.connect(**self.db_config)

def get_running_queries(self):
"""获取当前运行的查询"""
with self.connection.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute("""
SELECT
ID,
USER,
HOST,
DB,
COMMAND,
TIME,
STATE,
INFO
FROM information_schema.PROCESSLIST
WHERE COMMAND != 'Sleep'
AND TIME > 1
ORDER BY TIME DESC
""")
return cursor.fetchall()

def get_slow_queries(self, limit=10):
"""获取最慢的查询"""
with self.connection.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute("""
SELECT
DIGEST_TEXT,
COUNT_STAR as execution_count,
ROUND(SUM_TIMER_WAIT/1000000000, 2) as total_time_sec,
ROUND(AVG_TIMER_WAIT/1000000000, 2) as avg_time_sec,
ROUND(MAX_TIMER_WAIT/1000000000, 2) as max_time_sec,
SUM_ROWS_EXAMINED,
SUM_ROWS_SENT
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT IS NOT NULL
ORDER BY SUM_TIMER_WAIT DESC
LIMIT %s
""", (limit,))
return cursor.fetchall()

def kill_long_running_query(self, process_id):
"""终止长时间运行的查询"""
with self.connection.cursor() as cursor:
cursor.execute(f"KILL {process_id}")
print(f"已终止查询进程: {process_id}")

def monitor_loop(self, interval=30):
"""监控循环"""
while True:
try:
print(f"\n=== 查询监控报告 {datetime.now()} ===")

# 检查运行中的查询
running_queries = self.get_running_queries()
if running_queries:
print(f"发现 {len(running_queries)} 个运行中的查询:")
for query in running_queries:
print(f" ID: {query['ID']}, 用户: {query['USER']}, "
f"运行时间: {query['TIME']}秒")
if query['TIME'] > 300: # 超过5分钟的查询
print(f" 警告: 长时间运行的查询!")
# 可以选择自动终止
# self.kill_long_running_query(query['ID'])

# 检查慢查询
slow_queries = self.get_slow_queries(5)
if slow_queries:
print(f"\n最慢的 {len(slow_queries)} 个查询:")
for i, query in enumerate(slow_queries, 1):
print(f" {i}. 平均耗时: {query['avg_time_sec']}秒, "
f"执行次数: {query['execution_count']}")
print(f" 查询: {query['DIGEST_TEXT'][:100]}...")

time.sleep(interval)

except KeyboardInterrupt:
print("\n监控已停止")
break
except Exception as e:
print(f"监控错误: {e}")
time.sleep(interval)

# 使用示例
if __name__ == "__main__":
db_config = {
'host': 'localhost',
'user': 'monitor_user',
'password': 'monitor_pass',
'database': 'performance_schema',
'charset': 'utf8mb4'
}

monitor = QueryMonitor(db_config)
monitor.connect()
monitor.monitor_loop(30) # 每30秒检查一次

最佳实践总结

1. 查询优化检查清单

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
-- 1. 检查是否使用了索引
EXPLAIN SELECT * FROM your_table WHERE your_condition;

-- 2. 检查索引选择性
SELECT
COUNT(DISTINCT column_name) / COUNT(*) as selectivity
FROM your_table;

-- 3. 检查查询是否可以使用覆盖索引
-- 确保 SELECT 的列都包含在索引中

-- 4. 避免 SELECT *,只查询需要的列
SELECT specific_columns FROM your_table WHERE condition;

-- 5. 合理使用 LIMIT
SELECT * FROM your_table WHERE condition LIMIT 100;

-- 6. 优化 JOIN 条件
-- 确保 JOIN 条件上有索引

-- 7. 避免在 WHERE 子句中使用函数
-- 使用范围查询替代函数调用

2. 性能优化原则

  1. 索引优先: 确保查询条件有合适的索引支持
  2. 减少数据量: 只查询需要的列和行
  3. 避免复杂子查询: 尽量使用 JOIN 替代子查询
  4. 合理使用缓存: 应用层缓存热点数据
  5. 监控和分析: 定期分析慢查询和性能指标
  6. 测试验证: 在生产环境前充分测试优化效果

3. 常见优化误区

1
2
3
4
5
6
7
8
9
10
11
12
13
14
-- 误区1: 过度使用索引
-- 不要为每个列都创建索引,会影响写性能

-- 误区2: 忽略数据类型
-- 使用合适的数据类型,避免不必要的存储开销

-- 误区3: 不考虑数据分布
-- 对于数据分布不均匀的列,索引效果可能不佳

-- 误区4: 盲目使用 LIMIT
-- 大偏移量的 LIMIT 仍然需要扫描前面的行

-- 误区5: 忽略查询缓存失效
-- 频繁更新的表不适合查询缓存

通过系统性地应用这些查询优化技巧,可以显著提升MySQL数据库的查询性能,为应用提供更好的响应速度和用户体验。记住,查询优化是一个持续的过程,需要根据数据增长和业务变化不断调整优化策略。

本站由 提供部署服务