1.
cat file:
>a11>a111>a1111>a22>a222>b13>b133
将各个title 下边的合在一起 输出如下:>a1111111>a2222>b1333
awk '/^>/&&!a[$1]++||!/^>/'
(可以改写成 awk '/>/{if(!a[$0]++){print $0};next}1' )
awk '{if($0~/>/){m=$0}else{a[m]=a[m]"\n"$0}}END{for(i in a)print i,a[i]}'
(if else语句可以next代替,改写成awk '{if(/>/){m=$0}else{a[m]=a[m]"\n"$0}}END{for(i in a)print i,a[i]}')
awk '/>/{!a[$0]++;print}' f1 >a1>a1>a1>a2>a2>b1>b1
awk '/>/{print}' f1 >a1>a1>a1>a2>a2>b1>b1
awk '/>/{if(!a[$0]++)print $0}' f1 >a1>a2>b1
awk '/>/&&!a[$0]++' f1 >a1>a2>b1
2.
cat file:
aaaaaaadadaadadadad3a3a3a3a3a3s02a2a2a2a2
重复出现1至4次字符的行都输出,重复出现1至4次字符的结果应是ababaadadadad3a3a3a3s0
awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++;l=asort(a)}a[l]<n' file
awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++}{for(i in a)print i,a[i]}' f5a 6a 3d 2a 4d 4a 23 3a 23 2a 10 1s 13 1a 42 5
(i为每行重复出现的字符,a[i]为每行中某个字符重复出现的次数)
awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++;len=asort(a)}{for(i in a)print i,a[i]}' f51 61 22 31 42 41 22 31 22 24 11 12 13 11 42 5
(a[i]为每行中某个字符重复出现的次数。i为每行中某个字符的数组下标值(每行去重后,每行有哪几个字符),如第一行只有a重复,去重后下标值为1;第二行中a、d重复,下标值为1、2;a3s0行没有重复,所以有下标值1、2、3、4,至于排序为何为4、1、2、3,可参考 awk 'BEGIN{s="a3s0";split(s,a,"");len=asort(a);for(i in a)print i,a[i]}' 4 s1 02 33 a)
awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++;len=asort(a)}{print a[len]}' f56343215
(a[len]为每行中某个字符重复出现的次数,所以a[len]<n表示取重复次数少于n的行)
3.
df -h
/dev/sda2 49G 21G 26G 45% /tmpfs 16G 0 16G 0% /dev/shm/dev/sda1 193M 34M 150M 19% /boot
AB=`df -h`
echo $AB发现结果是以空格为分隔符如何实现跟df -h一样的换行显示呢?
AB="$(df -h)"
echo "$AB"
4.
有一个文件file1:(tab分隔)
track name=STRINGENT_LT_1KBchr3 23232 asdbaskjdbchr3 32434 daffsfchr5 43131 dkfjkdjgkdjgkchr6 4574857 wejwechr7 11313 sjdjsgdchr8 2323 lskdksstrack name=STRINGENT_HIGH_SCOREchr1 1212123 fkekfhdchr2 2322342 erererchr2 43444 sdsdsdchr3 454545 dkfjdkjfkchr5 343434 qsadjhdjhtrack name=STRINGENT_LT_332KBtrack name=STRINGENT_LOW_SCOREchr7 2323232 sdsdsdaaachr10 223232 dssdsdschr19 677675 dlkslk
统计不同track name下的数目,结果如下:(tab分隔)
track name=STRINGENT_LT_1KB:6track name=STRINGENT_HIGH_SCORE:5track name=STRINGENT_LT_332KB:0track name=STRINGENT_LOW_SCORE:3
awk '{if(/^t/){if(NR>1)print s":"n;n=0;s=$0}else n++}END{print s":"n}' f8
(可以改写成next语句:awk '/^t/{if(NR>1)print s":"n;n=0;s=$0;next}{n++}END{print s":"n}' f8)
awk '{if(/track/){m=$0}else i++;print m,i}' f8
track name=STRINGENT_LT_1KB track name=STRINGENT_LT_1KB 1track name=STRINGENT_LT_1KB 2track name=STRINGENT_LT_1KB 3track name=STRINGENT_LT_1KB 4track name=STRINGENT_LT_1KB 5track name=STRINGENT_LT_1KB 6track name=STRINGENT_HIGH_SCORE 6track name=STRINGENT_HIGH_SCORE 7track name=STRINGENT_HIGH_SCORE 8……
awk '{if(/track/){m=$0;i=0}else i++;print m,i}' f8
track name=STRINGENT_LT_1KB 0track name=STRINGENT_LT_1KB 1track name=STRINGENT_LT_1KB 2track name=STRINGENT_LT_1KB 3track name=STRINGENT_LT_1KB 4track name=STRINGENT_LT_1KB 5track name=STRINGENT_LT_1KB 6track name=STRINGENT_HIGH_SCORE 0track name=STRINGENT_HIGH_SCORE 1track name=STRINGENT_HIGH_SCORE 2track name=STRINGENT_HIGH_SCORE 3track name=STRINGENT_HIGH_SCORE 4track name=STRINGENT_HIGH_SCORE 5……
awk '{if(/^t/){m=$0;i=0}else i++}END{print m,i}' f8
track name=STRINGENT_LOW_SCORE 3
awk '{if(/^t/){m=$0;a[m]=0}else a[m]++}END{print m,a[m]}' f8
track name=STRINGENT_LOW_SCORE 3
awk '{if(/^t/){m=$0;a[m]=0}else a[m]++}END{for(i in a){print i":"a[i]}}' f8
(可以改写成next语句:awk '/^t/{m=$0;a[m]=0;next}{a[m]++}END{for(i in a){print i":"a[i]}}' f8)
track name=STRINGENT_LT_1KB:6track name=STRINGENT_LT_332KB:0track name=STRINGENT_HIGH_SCORE:5track name=STRINGENT_LOW_SCORE:3
awk '{if(/^t/){if(NR>1)print s;s=$0}}' f8
track name=STRINGENT_LT_1KBtrack name=STRINGENT_HIGH_SCOREtrack name=STRINGENT_LT_332KB
awk '{if(/^t/){if(NR>1)print s;s=$0;print s}}' f8
track name=STRINGENT_LT_1KBtrack name=STRINGENT_LT_1KBtrack name=STRINGENT_HIGH_SCOREtrack name=STRINGENT_HIGH_SCOREtrack name=STRINGENT_LT_332KBtrack name=STRINGENT_LT_332KBtrack name=STRINGENT_LOW_SCORE
5.
echo "a b|c"|awk -F' |\\|' '{print NF}'
3echo "a b|c"|awk -F' |\|' '{print NF}'
awk: 警告: 转义序列“\|”被当作单纯的“|”2echo "a b|c"|awk -F'[ |]' '{print NF}'
3
awk -v RS='...'
这种写法是可以用少写一个,但不"规范"吧。可以少一个的原因是,最
后落单的那个\,经过 awk 的字符串解释后还是\,与\\的解释结果是一样的。awk 'BEGIN{RS="...";}
写法就必须写够,一个也不能少。
$ echo -E '\\\'
\\\$ echo|awk -v v='\\\' '{print "["v"]"}'
[\\]$ echo|awk -v v='\\\\' '{print "["v"]"}'
[\\]$ echo|awk -v v='\' '{print "["v"]"}'
[\]$ echo|awk -v v='\\' '{print "["v"]"}'
[\]$ echo|awk 'BEGIN{v="\\";}{print "["v"]"}'
[\]$ echo|awk 'BEGIN{v="\\\";}{print "["v"]"}'
出错
cat file:
123
123{\123
123123{\1afewfwfaf
怎么才能用awk以{\为行分隔符来分割得到每一段的内容想要得到第一段
123123
awk -vRS='{\\\\' 'NR==1'
6.
字符串 'aaabcccaaabbbccc',连续的字母作为一个子字符串,从左往右,去重复后,变为 'aaabcccbbb' , awk或sed实现,不使用管道。
awk -F '' '{while(i++<=NF){s=s$i;if($i!=$(i+1)){if(!a[s]++)printf s;s=""}}}'
awk -F'ccc' '{sub(/[a]+/,"",$2);print $1FS$2}'
7.
seq 9
123456789如何得到以下结果:1 2 32 3 43 4 54 5 65 6 76 7 87 8 98 9
seq 9|awk '{a[NR]=$0}END{for(i=1;i<NR;i++)print a[i],a[i+1],a[i+2]}'
seq 9 | awk '{b=($1+1<=9)?$1+1:" ";c=($1+2<=9)?$1+2:" "; print $1" "b" "c}'
seq 9|awk 'NR==1{a=$1};NR==2{b=$1};NR>2{print a,b,$1;a=b;b=$1}END{print a,b}'
8.
[root@localhost ~]# a=1
[root@localhost ~]# while read num ; do a=$num ; done < <(seq 5) ; echo $a 5[root@localhost ~]# for num in $(seq 5) ; do a=$num ; done ; echo $a5
seq 5 | while ... 这样的方式是产生一个了shell子进程,自然子进程里的东西不能改变父进程的值
9.
awk '/<frame name="mainFrame" src="/{print a[match($0,/<frame name="mainFrame" src="\/([^"]*)".*/,a)]}'
grep -Po '(?<=name="mainFrame" src=")[^"]+'
awk -F '[=" ]+' -v RS=">" '/"mainFrame" src="/{print $6}'
10.
实现这样
(a=1且b=1)或者(c=1且d=1)成立时echo pass这样能用if实现么?如何来嵌套?
a=1; b=1; c=0; d=0;if [ $a -eq 1 ] && [ $b -eq 1 ];then echo pass;elif [ $c -eq 1 ] && [ $d -eq 1 ];then echo pass;fi
a=1; b=1; c=0; d=0; if (((a==1 && b==1 )||(c==1 && d ==1)));then echo pass;fi
awk 'BEGIN{a=1;b=1;c=0;d=1;if(((a==1&&b==1)||(c==1&&d==1))){print "pass"}else{print "。。"}}'
awk 'BEGIN{a=1; b=1; c=0; d=0;if(a==1&&b==1){print "pass"}else if(c==1&&d==1){print "pass"}else{print "。。"}}'
if [[ ($a = 1 && $b = 1) || ($c = 1 && $d = 1) ]];then
echo Trueelse echo Falsefi
11.
awk 'm~/==/&&/^\s*$/{print m}{m=$0}'
12.
grep -vf B A
awk 'NR==FNR{a[$0];next}{for(i in a){if(match($0,i)>0)next}print}' B A
awk 'NR==FNR{a[$0];next}{for(i in a)if(index($0,i))next}1' B A
awk 'FNR==NR{k=k?k"|"$0:$0;next}$0!~k{print}' B A
(awk '{k=k?k"|"$0:$0}END{print k}' BNow|We|ment)
13.
二维数组
awk 'BEGIN{for(i=1;i<=3;i++)for(j=1;j<=4;j++)a[i,j]=i*j;for(x in a){split(x,b,SUBSEP);print b[1]"*"b[2]"="a[x]}}'
找出文件A有文件B没有的行
awk 'BEGIN{for(i=1;i<=3;i++){for(j=1;j<=4;j++){s=s"R"i"C"j;print s}}}'
或者awk 'BEGIN{for(i=1;i<=3;i++)for(j=1;j<=4;j++)print "R"i"C"j}'
awk 'BEGIN{for(i=1;i<=3;i++)for(j=1;j<=4;j++)a["R"i"C"j];for(i in a)print i}'
awk '{for(i=1;i<=3;i++)for(j=1;j<=4;j++)a["R"i"C"j]}END{for(i in a)print i}' file
awk 'BEGIN{for(i=1;i<=3;i++){for(j=1;j<=4;j++){s="R"i"C"j;print s>"file"}}}'
awk 'NR==FNR{a[$2];next}{for(i in a){if($0==i)next}print}’
14.
awk '{ORS=NR%2?"\n":" "}1'
awk 'NR==1{print;next}NR%2==0{lastline=$0;next}{print lastline,$0}'
awk 'NR%2==0{m=$0}NR%2==1{print m,$0}'
awk 'NR==1{print}NR%2==0{x=NR;m=$0}x{getline s;print m,s}'
awk 'NR==1{print}NR%2==0{x=NR;m=$0}x&&NR-x==1{print m,$0}'
awk 'NR==1{print}{a[NR]=$0}END{for(i=1;i<=NR;i++){if(i%2==0)print a[i],a[i+1]}}'
15.
\1代表你前面第一个\( \)里面的内容 \2代表第二个,以此例推
awk --re-interval 'NR==1{a=gensub(/.*(.{10})/,"\\1",1);print a+0}'
awk -F '' '{m=substr($0,NF-9,NF);gsub(/0+/,"",m);print m}'
16.
awk '{a[$1]=$2;b[$1]=$1}END{for(i=1;i<=asort(b,c);i++)print b[c[i]],a[b[c[i]]],i%3==1?++n:n}'
cat file:
1 a
3 s2 f5 g7 j4 t9 r6 w8 l
根据第一行排序第二行
awk '{a[$1]=$2}END{for(i=1;i<=NR;i++)print i,a[i]}'
awk '{a[$1]=$2}END{for(i=1;i<=asort(a,c);i++)print i,a[i]}'
$ cat file
aaa 125ddd 123bbb 128ccc 120$ awk '{a[$2]=$0}END{for(i=1;i<=asort(a);i++)print a[i]}' file
aaa 125bbb 128ccc 120ddd 123$ awk '{a[$2]=$0}END{for(i=1;i<=asorti(a,b);i++)print a[b[i]]}' file
ccc 120ddd 123aaa 125bbb 128
echo "8 11111 9" | awk '{split($0,a," ");for(i=1;i<=asort(a,c);i++)print i,a[i],c[i]}'
1 8 82 11111 93 9 11111
echo "8 11111 9" | awk '{split($0,a," ");for(i=1;i<=asort(a);i++)print i,a[i],c[i]}'
1 8 2 9 3 11111
echo "8 11111 9" | awk '{split($0,a," ");for(i=1;i<=asort(a,c);i++)print i,a[2],c[2]}'
1 11111 92 11111 93 11111 9
17.
var="'(]\\{}\$\""
18.
#! /bin/sh
A=B echo $A echo $A
19.
awk '/ve/||(sub($2,$2 FS $NF) && NF--)' OFS='\t'
awk '/ap/{for(i=1;i<=NF;i++){if(i==3)printf $NF"\t"$i"\t";else if($i==$NF)print "";else{printf $i"\t"}}next}1'
20.
awk 'NR==FNR{a[$1" "$2" "$3]=$1;b[$1]++}NR>FNR{counter=0;for(i in a)if($1==a[i]){split(i,m," ");i || $2>m[3] || (m[2]==$2 && m[3]==$3))counter++}print NR,counter}' f1 f1
awk 'NR==FNR{a[$1" "$2" "$3]=$1;b[$1]++}NR>FNR{counter=0;for(i in a){if($1==a[i]){split(i,m," ");] || $2>m[3] || ( m[2]==$2 && m[3]==$3))counter++;else next;if(counter == b[$1])print $0}}}' f1 f1
21.
awk '{if($1==x){s=s","$2}else{if(NR>1){print s}s=$1"\t"$2}x=$1}END{print s}'
awk '{x[$1]=length(x[$1])?x[$1]","$2:$2}END{for(i in x)print i"\t"x[i]}'
awk '{x[$1]=length(x[$1])?x[$1]","$2:$2;len=asorti(x,b)}END{for(i=1;i<=len;i++)print b[i],x[b[i]]}' f5
awk '/track/{x=NR;a[x]=$0}x{print x,NR,a[x],NR-x}'
7720 98
7720 99
7720 218
7720 219
7720 220
awk '{a[$1]=$2}{printf a[$1]","}'
awk -v RS='7720' '{printf ","$1}'
awk '{x=$1;if($1==x){s=s","$2}}END{print x,s}'
awk '{if($1==x){s=s","$2}else{if(NR>1){print s}s=$1"\t"$2}x=$1}END{print s}'
awk '{if($1==x){if($2==y+1)s=s"-"$2;else s=s","$2}else{if(NR>1){print s}s=$1"\t"$2}x=$1;y=$2}END{gsub(/-[^,]*-/,"-",s);print s}'
22.
练习题(排序合并)
awk '{m=gensub(/([^0-9]+).*/,"\\1",1,$2);n=gensub(/[^0-9]+(.*)/,"\\1",1,$2);t=$1" "sprintf("%s%10[t]=$1;b[t]=m;c[t]=n}END{for(i=0;i++<asorti(c,d);){print d[i],a[d[i]]}}'
apple cat 12 apple
apple cat 106 appleapple cat 107 appleapple cat 108 appleapple cat 109 appleapple cat 123 appleapple cat 125 applepear dog 11 pearpear dog 101 pearpear dog 103 pearpear dog 104 pearpear dog 105 pearpear mouse 106 pearpear mouse 107 pearpear mouse 108 pearpear mouse 109 pearpear mouse 123 pearpear mouse 125 pear
awk '{m=gensub(/([^0-9]+).*/,"\\1",1,$2);n=gensub(/[^0-9]+(.*)/,"\\1",1,$2);t=$1" "sprintf("%s%10s",m,n);a[t]=$1;b[t]=m;c[t]=n}END{for(i=0;i++<asorti(c,d);){if(!k)printf "%s: ",a[d[i]]}}'
awk '{m=gensub(/([^0-9]+).*/,"\\1",1,$2);n=gensub(/[^0-9]+(.*)/,"\\1",1,$2);t=$1" "sprintf("%s%10s",m,n);a[t]=$1;b[t]=m;c[t]=n}END{for(i=0;i++<asorti(c,d);){if(!k)printf "%s: ",a[d[i]];if(k&&k!=a[d[i]]){printf "%s\n%s: ",s,a[d[i]];s=""}}}'
23.
awk '{a[$1];for(i=1;i++<NF;)b[$1,i]=b[$1,i]!=""?b[$1,i]"/"$i:$i}END{for(i in a){printf i;for(j=1;j++<NF;)printf " "b[i,j];print ""}}'
awk '{if(a[$1]){split(a[$1],b," ");a[$1]=$1" "b[2]"/"$2" "b[3]"/"$3" "b[4]"/"$4}else{a[$1]=$0}}END{l=asort(a);for(i=1;i<=l;i++)print a[i]}' file
24.
awk -F '' '{if($3~4)print $0>"a";else print$0>"b"}' f5
awk -F '' '$3~4{print $0>"e"}$3~5{print$0>"f"}' f5awk '/^..4/{print $0>"g"}/^..5/{print$0>"h"}' f5awk '{m=substr($0,3,1);if(m==4)print $0>"c";else{print $0>"d"}}' f5awk '{print >substr($0,3,1)".txt"}'