1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
| #!/bin/bash
analyze_web_logs() { local log_file="$1" local output_dir="${2:-./analysis}" mkdir -p "$output_dir" echo "开始分析Web日志:$log_file" echo "分析IP访问模式..." grep -o -E "^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" "$log_file" | \ sort | uniq -c | sort -nr | \ awk '{ if($1 > 1000) level="高频"; else if($1 > 100) level="中频"; else level="低频"; printf "%-15s %6d次 [%s]\n", $2, $1, level; }' > "$output_dir/ip_analysis.txt" echo "分析状态码分布..." awk '{print $9}' "$log_file" | \ grep -E "^[0-9]{3}$" | \ sort | uniq -c | sort -nr | \ awk '{ if($2 ~ /^2/) status="成功"; else if($2 ~ /^3/) status="重定向"; else if($2 ~ /^4/) status="客户端错误"; else if($2 ~ /^5/) status="服务器错误"; else status="其他"; printf "%-3s %-12s %6d次\n", $2, status, $1; }' > "$output_dir/status_analysis.txt" echo "分析访问时间模式..." sed -n 's/.*\[\([^:]*\):\([0-9]\{2\}\):.*/\1 \2/p' "$log_file" | \ awk '{ date_hour[$1" "$2]++; hour[$2]++; } END { print "=== 每小时访问统计 ===" > "'$output_dir'/time_analysis.txt"; for(h=0; h<24; h++) { printf "%02d:00 %6d次\n", h, hour[sprintf("%02d", h)] > "'$output_dir'/time_analysis.txt"; } print "\n=== 每日每小时详细统计 ===" >> "'$output_dir'/time_analysis.txt"; for(dh in date_hour) { printf "%-20s %6d次\n", dh, date_hour[dh] >> "'$output_dir'/time_analysis.txt"; } }' echo "生成综合报告..." { echo "=== Web日志分析报告 ===" echo "分析时间:$(date)" echo "日志文件:$log_file" echo "总访问量:$(wc -l < "$log_file")" echo "" echo "=== TOP 10 访问IP ===" head -10 "$output_dir/ip_analysis.txt" echo "" echo "=== 状态码分布 ===" cat "$output_dir/status_analysis.txt" echo "" echo "=== 访问高峰时段 ===" head -5 "$output_dir/time_analysis.txt" } > "$output_dir/summary_report.txt" echo "分析完成,结果保存在:$output_dir" }
process_sales_data() { local csv_file="$1" echo "=== 销售数据分析 ===" echo "数据清洗中..." grep -v "^$" "$csv_file" | \ sed '1d' | \ awk -F',' '{ # 验证数据完整性 if(NF >= 4 && $3 ~ /^[0-9]+$/ && $4 ~ /^[0-9]+$/) { print $0; } else { print "无效数据行:" $0 > "/dev/stderr"; } }' > temp_clean_data.csv awk -F',' '{ # 按部门统计 dept_count[$2]++; dept_salary[$2] += $3; dept_bonus[$2] += $4; # 总体统计 total_salary += $3; total_bonus += $4; total_count++; # 薪资范围统计 if($3 < 4000) low_salary++; else if($3 < 5000) mid_salary++; else high_salary++; } END { print "=== 总体统计 ==="; printf "员工总数:%d\n", total_count; printf "平均薪资:%.2f\n", total_salary/total_count; printf "平均奖金:%.2f\n", total_bonus/total_count; printf "薪资总额:%d\n", total_salary; printf "奖金总额:%d\n", total_bonus; print "\n=== 薪资分布 ==="; printf "低薪(<4000):%d人\n", low_salary; printf "中薪(4000-5000):%d人\n", mid_salary; printf "高薪(>5000):%d人\n", high_salary; print "\n=== 部门统计 ==="; for(dept in dept_count) { printf "%-10s: %2d人, 平均薪资:%.2f, 平均奖金:%.2f\n", dept, dept_count[dept], dept_salary[dept]/dept_count[dept], dept_bonus[dept]/dept_count[dept]; } }' temp_clean_data.csv rm -f temp_clean_data.csv }
main() { case "$1" in "web") analyze_web_logs "$2" "$3" ;; "sales") process_sales_data "$2" ;; *) echo "用法:" echo " $0 web <log_file> [output_dir] - 分析Web日志" echo " $0 sales <csv_file> - 分析销售数据" ;; esac }
main "$@"
|