awk qos分析脚本两例。

之前写的用于分析webcdn日志的awk脚本。一个可以用来分析流量和状态码。另一个用来分析错误码。

使用方式:

zcat  /.log.gz |awk -f  analyze_awk.awk -  #domain和traffic相关分析,要注 意日志格式

zcat  /.log.gz |awk -f  auto_awk.awk -   #wrong http code分析

(目前我们使用监控系统来直接调用脚本,在发现问题时可以直接分析错误的domain和url,方便快速定位问题)

有兴趣的同学测试下,欢迎大家共同学习和探讨。

脚本1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
        
gsub(/
"/,"
");
        
client=$1;
        
domain=$2;
        
url=$7;
        
status=$9;
        
bytes=$10;
        
upstream=$(NF-2);
        
time
=$NF;     
        
if 
(((match(status,
"4.."
))||(match(status,
"5.."
)))&& (status != 400)) {
            
++arr_status_num[status];
            
++arr_domain_num[domain];
            
++arr_domain_status[domain,status];
            
++arr_domain_url_num[
"http://"
domain
""
url
""
];
            
++arr_domain_url_status[
"http://"
domain
""
url
""
,status];
        
}
}
END {
    
#http status statistic
    
print 
"\033[40;33m#################################\033[0m"
    
print 
"\033[40;32m#get http code summary: code,sum#\033[0m"
    
print 
"\033[40;33m#################################\033[0m"
    
status_sort=asorti(arr_status_num,sort_status);
    
for 
(i=1; i<=status_sort; i++) {
        
s=sort_status[i];
        
if 
(s ~ 
"4.."
) {
            
printf 
"%s:%d\n"
,
            
s,arr_status_num[s];
        
else 
if 
(s ~ 
"5.."
) {
            
printf 
"%s:%d\n"
,
            
s,arr_status_num[s];
        
}
    
}
        
#get domain statistics
        
print 
"==================================================="
        
print 
"\033[40;33m###############################################################\033[0m"
        
print 
"\033[40;32m#get domain  summary: domain|error_sum|4xx|5xx|404|499|502|504#\033[0m"
        
print 
"\033[40;33m###############################################################\033[0m"
        
domain_sort=asorti(arr_domain_num,sort_domain);
        
for 
(i=1; i<=domain_sort; i++) {
        
g=sort_domain[i];
        
arr_domain_num_error=arr_domain_status[g,502]+arr_domain_status[g,504]+arr_domain_status[g,404]+arr_domain_status[g,499]+arr_domain_status[g,415]+arr_domain_status[g,403];
        
arr_domain_num_4xx=arr_domain_status[g,404]+arr_domain_status[g,499]+arr_domain_status[g,415]+arr_domain_status[g,403]
        
arr_domain_num_5xx=arr_domain_status[g,502]+arr_domain_status[g,504]
        
printf 
"%s|%d|%d|%d|%d|%d|%d|%d\n"
,
        
g,arr_domain_num_error,arr_domain_num_4xx,arr_domain_num_5xx,arr_domain_status[g,404],arr_domain_status[g,499],arr_domain_status[g,502],arr_domain_status[g,504] | 
"sort -t '|' -k 2 -nr|head -20"
    
}
        
close(
"sort -t '|' -k 2 -nr|head -20"
)
        
#get url statistics
        
print 
"==================================================="   
        
print 
"\033[40;33m################################################################\033[0m"
        
print 
"\033[40;32m#get url summary: url | error_sum| 4xx| 5xx| 404| 499| 502| 504#\033[0m"
        
print 
"\033[40;33m################################################################\033[0m"
        
url_sort=asorti(arr_domain_url_num,url_domain);
        
for 
(i=1; i<=url_sort; i++) {
        
g=url_domain[i];
        
arr_domain_url_num_error=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,502]+arr_domain_url_status[g,504]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
        
arr_domain_url_num_4xx=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
        
arr_domain_url_num_5xx=arr_domain_url_status[g,502]+arr_domain_url_status[g,504];     
        
printf 
"%s | %d| %d| %d| %d| %d| %d| %d\n"
,
        
g,arr_domain_url_num_error,arr_domain_url_num_4xx,arr_domain_url_num_5xx,arr_domain_url_status[g,404],arr_domain_url_status[g,499],arr_domain_url_status[g,502],arr_domain_url_status[g,504]| 
"sort -t ' ' -k 3 -nr|head -20"
    
}
        
close(
"sort -t ' ' -k 3 -nr|head -20"
)
}

脚本2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
BEGIN {
        
OFS=
";"
}
{
        
gsub(/
"/,"
");
        
client=$1;
        
domain=$2;
        
url=$7;
        
status=$9;
        
bytes=$10;
        
upstream=$(NF-2);
        
time
=$NF; 
        
if 
(status>0 && (match(status,
"..."
))) {
            
sum_access++;
            
++arr_status_num[status];
            
arr_status_bytes[status]+=bytes;
            
arr_status_time[status]+=
time
;
               
            
++arr_domain_num[domain];
            
++arr_domain_status[domain,status];
            
++arr_domain_url_num[
"http://"
domain
""
url
""
];
            
++arr_domain_url_status[
"http://"
domain
""
url
""
,status];
            
if 
(status==200 || status==206) {
                
#about domain
                
arr_domain_bytes[domain]+=bytes;
                
arr_domain_time[domain]+=
time
;
                
#about client(usualy ip)
                
arr_client_bytes[client]+=bytes;
                
arr_client_time[client]+=
time
;
       
                
if 
(
time 
<= 0.1) {
                    
++arr_status_speed1[status]; ++arr_domain_speed1[domain];
                
}
       
                
if 
((
time 
<= 0.2) && (
time 
>= 0.1)) {
                    
++arr_status_speed2[status]; ++arr_domain_speed2[domain];
                
}
       
                
if 
((
time 
<= 0.5) && (
time 
>= 0.2)) {
                    
++arr_status_speed5[status]; ++arr_domain_speed5[domain];
                
}
       
                
if 
((
time 
<= 1) && (
time 
>= 0.5)) {
                    
++arr_status_speed10[status]; ++arr_domain_speed10[domain];
                
}
                
if 
((
time 
<= 2) && (
time 
>= 1)) {
                    
++arr_status_speed20[status]; ++arr_domain_speed20[domain];
                
}
       
                
if 
((
time 
<= 5) && (
time 
>= 2)) {
                    
++arr_status_speed50[status]; ++arr_domain_speed50[domain];
                
}
                   
                
if 
(
time 
>= 5) {
                    
++arr_status_speed5x[status]; ++arr_domain_speed5x[domain];
                
}
            
}
        
}
}
END {
    
#http status statistic
    
print 
"\033[40;33m##########################################################################################################################\033[0m"
    
print 
"\033[40;32m#get http code summary: code,sum,rate,speed(Kb),avg_time(s),<0.1 rate,<0.2 rate,<0.5 rate,<1 rate,<2 rate,<5 rate,>5 rate#\033[0m"
    
print 
"\033[40;33m##########################################################################################################################\033[0m"
    
status_sort=asorti(arr_status_num,sort_status);
    
#print arr_status_time[200]
    
for 
(i=1; i<=status_sort; i++) {
        
s=sort_status[i];
        
if 
(s==200 || s==206) {
            
printf 
"%s:%d,%.2f|%.2f|%.3f|%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n"
,
            
s,arr_status_num[s],arr_status_num[s]
/sum_access
,arr_status_bytes[s]*8/(1024*arr_status_time[s]),arr_status_time[s]
/arr_status_num
[s],
            
arr_status_speed1[s]
/arr_status_num
[s]*100,arr_status_speed2[s]
/arr_status_num
[s]*100,arr_status_speed5[s]
/arr_status_num
[s]*100,
            
arr_status_speed10[s]
/arr_status_num
[s]*100,arr_status_speed20[s]
/arr_status_num
[s]*100,arr_status_speed50[s]
/arr_status_num
[s]*100,arr_status_speed5x[s]
/arr_status_num
[s]*100;
        
else 
if 
(s ~ 
"3.."
) {
            
printf 
"%s:%d,%.2f\n"
,
            
s,arr_status_num[s],arr_status_num[s]
/sum_access
;
        
else 
if 
(s ~ 
"4.."
) {
            
printf 
"%s:%d,%.2f\n"
,
            
s,arr_status_num[s],arr_status_num[s]
/sum_access
;
        
else 
if 
(s ~ 
"5.."
) {
            
printf 
"%s:%d,%.2f\n"
,
            
s,arr_status_num[s],arr_status_num[s]
/sum_access
;
        
}
           
    
}
        
#get domain statistics
        
print 
"==================================================="   
        
print 
"\033[40;33m###########################################################################\033[0m"
        
print 
"\033[40;32m#get domain  summary: domain,sum,rate,valid rate,4xx rate,5xx rate,data(G)#\033[0m"
        
print 
"\033[40;33m###########################################################################\033[0m"
        
domain_sort=asorti(arr_domain_num,sort_domain);
        
for 
(i=1; i<=domain_sort; i++) {
        
g=sort_domain[i];
        
arr_domain_num_valid=arr_domain_status[g,200]+arr_domain_status[g,206]+arr_domain_status[g,302]+arr_domain_status[g,301];
        
arr_domain_num_4xx=arr_domain_status[g,404]+arr_domain_status[g,499]+arr_domain_status[g,403]+arr_domain_status[g,415];
#       arr_domain_num_4xx=arr_domain_status[g,4..];
        
arr_domain_num_5xx=arr_domain_status[g,502]+arr_domain_status[g,504];
        
arr_domain_num_404=arr_domain_status[g,404]
        
arr_domain_num_499=arr_domain_status[g,499]
        
if 
(arr_domain_time[g]>0) {
            
printf 
"%s:%d,%.2f|%.2f,%.2f,%.2f,%.2f\n"
,
            
g,arr_domain_num[g],arr_domain_num[g]
/sum_access
,arr_domain_num_valid
/arr_domain_num
[g],arr_domain_num_4xx
/arr_domain_num
[g],arr_domain_num_5xx
/arr_domain_num
[g],arr_domain_bytes[g]*8/(1024*1024*1024) | 
"sort -t ',' -k 2 -nr|head -20"
        
}
    
}
        
close(
"sort -t ',' -k 2 -nr|head -20"
)
        
#get url sum statistics
        
print 
"==================================================="
           
        
print 
"\033[40;33m###########################################################\033[0m"
        
print 
"\033[40;32m#get url  summary:url| sum| valid rate| 4xx rate |5xx rate#\033[0m"
        
print 
"\033[40;33m###########################################################\033[0m"
        
url_sort=asorti(arr_domain_url_num,url_domain);
        
for 
(i=1; i<=url_sort; i++) {
        
g=url_domain[i];
        
arr_domain_url_num_valid=arr_domain_url_status[g,200]+arr_domain_url_status[g,206]+arr_domain_url_status[g,302]+arr_domain_url_status[g,301];
        
arr_domain_url_num_4xx=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
        
arr_domain_url_num_5xx=arr_domain_url_status[g,502]+arr_domain_url_status[g,504];
        
arr_domain_url_num_404=arr_domain_url_status[g,404]
        
arr_domain_url_num_499=arr_domain_url_status[g,499]
           
        
printf 
"%s| %d| %.2f| %.2f| %.2f\n"
,
        
g ,arr_domain_url_num[g] ,arr_domain_url_num_valid
/arr_domain_url_num
[g],arr_domain_url_num_4xx
/arr_domain_url_num
[g],arr_domain_url_num_5xx
/arr_domain_url_num
[g]| 
"sort -t ' ' -k 2 -nr|head -20"
    
}
        
close(
"sort -t ' ' -k 2 -nr|head -20"
)
        
#get url errorsum statistics
        
print 
"==================================================="
           
        
print 
"\033[40;33m##########################################################################\033[0m"
        
print 
"\033[40;32m#get url  summary:url| sum| valid rate| errorsum rate| 4xx rate| 5xx rate#\033[0m"
        
print 
"\033[40;33m##########################################################################\033[0m"
        
url_sort=asorti(arr_domain_url_num,url_domain);
        
for 
(i=1; i<=url_sort; i++) {
        
g=url_domain[i];
        
arr_domain_url_num_valid=arr_domain_url_status[g,200]+arr_domain_url_status[g,206]+arr_domain_url_status[g,302]+arr_domain_url_status[g,301];
        
arr_domain_url_num_4xx=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
        
arr_domain_url_num_5xx=arr_domain_url_status[g,502]+arr_domain_url_status[g,504];
        
arr_domain_url_num_404=arr_domain_url_status[g,404]
        
arr_domain_url_num_499=arr_domain_url_status[g,499]
        
printf 
"%s| %d| %.2f| %.2f| %.2f| %.2f\n"
,
        
g ,arr_domain_url_num[g] ,arr_domain_url_num_valid
/arr_domain_url_num
[g] ,(arr_domain_url_num_4xx+arr_domain_url_num_5xx)
/arr_domain_url_num
[g] ,arr_domain_url_num_4xx
/arr_domain_url_num
[g] ,arr_domain_url_num_5xx
/arr_domain_url_num
[g]| 
"sort -t ' ' -k 4 -nr|head -10"
    
}
        
close(
"sort -t ' ' -k 4 -nr|head -10"
)
}