awk 工作模式
与 sed 相同, 都是逐行处理
语法格式
- awk ‘BEGIN{}pattern{commands}END{} filename’
- stdout | awk ‘BEGIN{}pattern{commands}END{}’
语法格式 | 说明 |
---|---|
BEGIN{} | 处理文本前执行 |
pattern | 匹配模式 |
{commands} | 处理命令, ; 隔开 |
END{} | 处理文本后执行 |
BEGIN{}, pattern, END{} 多可省略
内置变量
内置变量 | 含义 |
---|---|
$0 | 整行内容 |
$1-$n | 按分隔符的第 1-n 个字段 |
NF (Number Field) | 当前行的字段个数(多少列) |
NR (Number Row) | 当前行行号, 从 1 开始计数 |
FNR (File Number Row) | 多文件处理时, 每个文件单独技术, 从 1 开始 |
FS (Field Separate) | 输入字段分隔符, 不指定为空格或 tab |
RS (Row Separator) | 输入行分隔符, 默认回车 |
OFS (Output Field Separator) | 输出字段分隔符 |
ORS (Output Row Separator) | 输出行分隔符, 默认回车 |
FILENAME | 当前输入文件名 |
ARGC | 命令行参数个数 |
ARGV | 命令行参数数组 |
基本使用:
$ awk '{print $0}' passwd
$ awk '{print $1,$3}' list
$ awk '{print NF}' list
$ awk '{print NR}' list
$ awk '{print FNR}' list awk.txt
$ awk 'BEGIN{FS=":"}{print $1}' passwd
$ awk 'BEGIN{RS="--"}{print $0}' list
$ awk 'BEGIN{FS=":";OFS="|"}{print $1,$3}' passwd # 每行输出字段
$ awk 'BEGIN{ORS="--"}{print $0}' passwd
$ awk '{print FILENAME}' passwd
$ awk '{print ARGC}' passwd list # 3 个参数 awk, passwd, list
格式化输出 printf
格式符 | 含义 |
---|---|
%s | 字符串 |
%d | 十进制 |
%f | 浮点数 |
%x | 十六进制 |
%o | 八进制 |
%e | 科学计数法 |
%c | 单个字符 |
修饰符:
修饰符 | 含义 |
---|---|
- | 左对齐 |
+ | 右对齐 |
# | 打印 十六进制与八进制时使用, 在前打印进制标识 |
示例
# %s: 默认左对齐
# %10s: 默认右对齐
$ awk 'BEGIN{FS=":"}{printf "%s",$7}' passwd
$ awk 'BEGIN{FS=":"}{printf "%10s",$7}' passwd
$ awk 'BEGIN{FS=":"}{printf "%-10s",$7}' passwd
$ awk 'BEGIN{FS=":"}{printf "%d",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%0.3f",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%x",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%#x",$3}' passwd # 显示 16 进制标识
$ awk 'BEGIN{FS=":"}{printf "%0",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%e",$3}' passwd
模式匹配
- 正则表达式 (固定写法
//
) - 关系运算匹配
正则表达式:****
# 含有 root 的行
$ awk 'BEGIN{FS=':'}/root/{print $0}' passwd
# 以 nginx 开头
$ awk 'BEGIN{FS=':'}/^nginx/{print $0}' passwd
关系运算符:
关系运算符 | 含义 |
---|---|
< 数值 | 小于 |
> 数值 | 大于 |
<= 数值 | 小于等于 |
>= 数值 | 大于等于 |
== | 等于 |
!= | 不等于 |
~ | 匹配正则 |
!~ | 不匹配正则 |
# 第 3 个字段小于 50
$ awk 'BEGIN{FS=":"}$3<50{print $0}' passwd
# 第 7 个字段为 /bin/bash
$ awk 'BEGIN{FS=":"}$7=="/bin/bash"{print $0}' passwd
$ awk 'BEGIN{FS=":"}$7!="/bin/bash"{print $0}' passwd
# 第 3 个字段包含 3 个及以上数字
$ awk 'BEGIN{FS=":"}$3~/[0-9]{3,}/{print $0}' passwd
逻辑运算符:
逻辑运算符 | 含义 |
---|---|
|| | 或 |
&& | 与 |
! | 非 |
$ awk 'BEGIN{FS=":"}$1=="root"||$1=="nginx"{print $0}' passwd
$ awk 'BEGIN{FS=":"}$3<50 && $3 >30 {print $0}' passwd
awk 算数运算
运算符 | 含义 |
---|---|
+ | 加 |
- | 减 |
* | 乘 |
/ | 除 |
% | 取余 |
^ 或 ** | 乘方 |
x++ ; x– | 先返回 x, 后 +/- x |
++x ; –x | 先 +/- x, 后 返回 x |
练习计算课程平均值
# 右对齐
$ awk 'BEGIN{printf "%10s%10s%10s%10s%10s\n","Name", "YuWen","ShuXue", "English", "AVG"}{total=$1+$2+$3;AVG=total/3;printf "%10s%10d%10d%10d%10.2f\n",$1,$2,$3,$4,AVG}' list
$ awk 'BEGIN{printf "%-10s%-10s%-10s%-10s%-10s\n","Name", "YuWen","ShuXue", "English", "AVG"}{total=$1+$2+$3;AVG=total/3;printf "%-10s%-10d%-10d%-10d%-10.2f\n",$1,$2,$3,$4,AVG}' list
条件语句
if-else
示例: script.awk
BEGIN{
FS=":"
}
{
if($3<50)
{
printf "%-20s%-10s%10d\n","UID<50",$1,$3
}
else if($3>50 && $3 <100)
{
printf "%-20s%-10s%10d\n","50<UID<100",$1,$3
}
else
{
printf "%-20s%-10s%10d\n","UID>100",$1,$3
}
}
$ awk -f script.awk /etc/passwd
循环语句
- do-while
- while
- for
计算 1+2+…100
- do-while
BEGIN{
do{
sum += i
i++
}while(i<=100)
print sum
}
- while
BEGIN{
while(i<=100)
{
sum += i
i++
}
print sum
}
- for
BEGIN{
for(i=0;i<=100;i++)
{
sum += i
}
print sum
}
练习: 打印平均分大于 70的, 并计算平均分
BEGIN{
printf "%-10s%-10s%-10s%-10s%-10s\n","Name","YuWen","Math","English","AVG"
}
{
total = $2 + $3 + $3
avg = total / 3
if (avg > 70)
{
printf "%-10s%-10d%-10d%-10d%-0.2f\n",$1,$2,$3,$4,avg
score_yuwen += $2
score_math += $3
score_english += $4
score_avg += avg
count++
}
}
END{
printf "%-10s%-10.2f%-10.2f%-10.2f%-0.2f\n","",score_yuwen/count,score_math/count,score_english/count,score_avg/count
}
字符串函数
函数名 | 解释 | 返回值 |
---|---|---|
length(str) | 计算字符串长度 | 长度值 |
index(str1,str2) | 在 str1 中查找 str2 | 返回位置索引, 从 1 计数 |
tolower(str) | 转小写 | 转小写后的字符串 |
toupper(str) | 转大写 | 转大写后的字符串 |
substr(str,m,n) | 从 str m 字符, 截取 n 位(n 可省略) | 截取后的子串 |
split(str,arr,fs) | 按 fs 切割字符串, 结果保存到 arr | 切割后的子串个数 |
match(str,RE) | 与 index() 类似, 但支持正则(RE) | 返回索引位置 |
sub(RE,RepStr,str) | 在 str 中搜索符合 RE 的子串将其替换为 RepStr; 只替换第一个 | 替换个数 |
gsub(RE,RepStr,str) | 在 str 中搜索符合 RE 的子串将其替换为 RepStr; 替换全部 | 替换个数 |
1. 打印 passwd 每个字段长度:
BEGIN{
FS=":"
}
{
i=1
while(i<=NF)
{
if(i==NF)
printf "%d",length($i)
else
printf "%d:",length($i)
i++
}
print ""
}
2. 查询"I have a dream"中"ea"索引
$ awk 'BEGIN{str="I have a dream";localtion=index(str,"ea");print localtion}'
# 12
$ awk 'BEGIN{str="I have a dream";localtion=match(str,"ea");print localtion}'
# 12
3. 大小写转换
$ awk 'BEGIN{str="I have a dream";print tolower(str)}'
# i have a dream
$ awk 'BEGIN{str="I have a dream";print toupper(str)}'
# I HAVE A DREAM
4. 切分数组
$ awk 'BEGIN{str="I have a dream";split(str,arr," ");print arr[2]}'
$ awk 'BEGIN{str="I have a dream";split(str,arr);print arr[2]}' # 默认空格分隔
# have
# 遍历, 不是顺序遍历
$ awk 'BEGIN{str="I have a dream";split(str,arr);for(a in arr) print arr[a]}'
# dream
# I
# have
# a
5. 搜索第一个出现的数字
# 正则必须用 //
$ awk 'BEGIN{str="I have a 123 dream"; print match(str, /[0-9]/)}'
# 10
6. 截取子串
$ awk 'BEGIN{str="I have a 123 dream"; print substr(str,3,7)}'
# have a
$ awk 'BEGIN{str="I have a 123 dream"; print substr(str,3)}'
# have a 123 dream
7. 替换数字
$ awk 'BEGIN{str="I have a 123 dream 324 hello"; print sub(/[0-9]+/,"$",str); print str}'
# 1
# I have a $ dream 324 hello
$ awk 'BEGIN{str="I have a 123 dream 324 hello"; print gsub(/[0-9]+/,"$",str); print str}'
# 2
# I have a $ dream $ hello
awk 常用选项
选项 | 说明 |
---|---|
-v | 参数传递 |
-f | 指定脚本文件 |
-F | 指定分隔符 |
-V | 查看版本 |
如果变量有空格, 要使用
""
$ num=13
$ var="hello world"
$ awk -v num1=$num -v var1=$var 'BEGIN{print num1,var1}'
# awk: fatal: cannot open file `BEGIN{print num1,var1}' for reading (No such file or directory)
$ awk -v num1="$num" -v var1="$var" 'BEGIN{print num1,var1}'
# 13 hello world
$ awk -F ":" '{print $0}' /etc/passwd
awk 与 shell 中数组
shell 中数组
下标从 0 开始
打印数组:
$ arr=("kubernetes" "etcd" "time" "redis")
# 打印数组
$ echo ${arr[@]}
$ echo ${arr[*]}
# kubernetes etcd time redis
# 打印元素
$ echo ${arr[2]}
# time
打印数组/元素长度; 分片访问; 元素操作; 删除元素:
$ arr=("kubernetes" "etcd" "time" "redis")
# 打印数组
$ echo ${#arr[@]}
$ echo ${#arr[*]}
# 4
# 打印元素
$ echo ${#arr[3]}
# 5
# 分片访问
$ echo ${arr[@]:1:3}
# etcd time redis
# 元素赋值
$ arr[2]=mysqlserver
$ echo ${arr[@]}
# kubernetes etcd mysqlserver redis
# 元素内容替换
$ echo ${arr[@]/e/E}
# kubErnetes Etcd mysqlsErver rEdis
$ echo ${arr[@]//e/E}
# kubErnEtEs Etcd mysqlsErvEr rEdis
# 元素删除 *** 通过下标删除后, 被删除的下标的元素为空, 原数组的其他元素下标不变
$ unset arr[0]
$ echo ${arr[@]}
# etcd mysqlserver redis
$ unset arr[0]
$ echo ${arr[@]}
# etcd mysqlserver redis
$ unset arr[1]
$ echo ${arr[@]}
# mysqlserver redis
# 删除数组
$ unset arr
通过下标删除后, 被删除的下标的元素为空, 原数组的其他元素下标不变
数组的遍历
$ arr=("kubernetes" "etcd" "time" "redis")
$ for a in ${arr[@]}; do echo $a; done
# kubernetes
# etcd
# time
# redis
awk 中数组
脚本练习
数据生成脚本:
#!/bin/bash
#
function create_random()
{
min=$1
max=$(($2-$min+1))
num=$(date +%s%N)
echo $(($num%$max+min))
}
INDEX=1
while true
do
for user in allen mike jerry tracy han lilei
do
COUNT=$RANDOM
NUM1=`create_random 1 $COUNT`
NUM2=`expr $COUNT - $NUM1`
echo "`date '+%Y-%m-%d %H:%M:%S'` $INDEX Batches: user $user insert $COUNT records into database:product table:detail, insert $NUM1 records successfully, failed $NUM2 records" >> ./db.log.`date +%Y%m%d`
INDEX=`expr $INDEX + 1`
done
done
数据格式
2023-12-12 02:49:31 1 Batches: user allen insert 25719 records into database:product table:detail, insert 24482 records successfully, failed 1237 records
2023-12-12 02:49:31 2 Batches: user mike insert 32653 records into database:product table:detail, insert 26055 records successfully, failed 6598 records
2023-12-12 02:49:31 3 Batches: user jerry insert 16986 records into database:product table:detail, insert 11636 records successfully, failed 5350 records
2023-12-12 02:49:31 4 Batches: user tracy insert 31899 records into database:product table:detail, insert 9250 records successfully, failed 22649 records
2023-12-12 02:49:31 5 Batches: user han insert 24256 records into database:product table:detail, insert 24033 records successfully, failed 223 records
统计所有成功, 失败, 总共记录数
count.awk:
BEGIN{
printf "%-10s%-20s%-20s%-20s\n","User","Total","Sucess","Failed"
}
{
TOTAL[$6]+=$8
SUCESS[$6]+=$14
FAILED[$6]+=$18
}
END{
for(t in TOTAL)
{
total += TOTAL[t]
sucess += SUCESS[t]
failed += FAILED[t]
printf "%-10s%-20s%-20s%-20s\n",t,TOTAL[t],SUCESS[t],FAILED[t]
}
printf "%-10s%-20s%-20s%-20s\n","",total,sucess,failed
}
$ awk -f count.awk db.log.20231212
User Total Sucess Failed
tracy 6096344 2963340 3133004
allen 6293470 3182865 3110605
mike 5845083 2912982 2932101
jerry 5996178 3080723 2915455
lilei 6217104 3028971 3188133
han 5923975 3089899 2834076
36372154 18258780 18113374
2. 打印丢失记录的行数
一条记录行中, 总记录数 != 成功记录数 + 失败记录数
$ awk '{if($8 != $14 + $18) print NR}' db.log.20231212