保守的IR的长度统计
# At与Dt中保守的IR的长度比较
awk '{split($1,D,"_");split($2,A,"_");if(D[7]-A[7]>100 ||D[7]-A[7]<-100){}else{print $0}}' At_Dt_conserve_IR.txt
# A2 与D5中保守的IR长度比较
awk '{split($1,D,"_");split($2,A,"_");if(D[5]-A[5]>100 ||D[5]-A[5]<-100){}else{print $0}}' D5_A2_conserve_IR.txt
# D5与Dt中保守的IR长度
awk '{split($1,D,"_");split($2,A,"_");if(D[7]-A[5]>100 ||D[7]-A[5]<-100){}else{print $0}}' D5_Dt_conserve_IR.txt
# A2与At中保守的IR长度
awk '{split($2,D,"_");split($1,A,"_");if(D[7]-A[5]>100 ||D[7]-A[5]<-100){}else{print $0}}' A2_At_conserve_IR.txt
比较保守的IR长度与不保守的IR长度
只有At、Dt、A2、D5中存在的IR :3_1、3_3、4_3、4_4
## 将共有的序列长度信息保存
awk '{split($1,D,"_");split($2,A,"_");print "consensus\t"D[7]"\nconsensus\t"A[7]}' At_Dt_conserve_IR.txt >>IRlength.txt
awk '{split($1,D,"_");split($2,A,"_");print "consensus\t"D[5]"\nconsensus\t"A[5]}' D5_A2_conserve_IR.txt >>IRlength.txt
awk '{split($1,D,"_");split($2,A,"_");print "consensus\t"D[7]"\nconsensus\t"A[5]}' D5_Dt_conserve_IR.txt >>IRlength.txt
awk '{split($2,D,"_");split($1,A,"_");print "consensus\t"D[7]"\nconsensus\t"A[5]}' A2_At_conserve_IR.txt >>IRlength.txt
## 只在单个基因组出现的IR事件
awk '{split($1,A,"_");print "At\t"A[7]}' ../converse/3_1 >>IRlength.txt
awk '{split($1,A,"_");print "Dt\t"A[7]}' ../converse/3_3 >>IRlength.txt
awk '{split($1,A,"_");print "D5\t"A[5]}' ../converse/4_4 >>IRlength.txt
awk '{split($1,A,"_");print "A2\t"A[5]}' ../converse/4_3 >>IRlength.txt
统计保守IR占总的IR的比例
统计每个基因保守的IR的数目、以及总的IR的数目
cat A2_At_conserve_IR.txt At_Dt_conserve_IR.txt |cut -f2|sort |uniq |cut -f1,2 -d "_"|uniq -c |awk '{print $2"\t"$1}'
## 统计总的IR的数目
cat A2_At_conserve_IR.txt At_Dt_conserve_IR.txt |cut -f2|sort |uniq |cut -f1,2 -d "_"|uniq -c |awk '{print $2}'|xargs -I {} grep {} ../../../all_homole_ES_IR.txt |cut -f6|sed 's/|.*//g' |awk '{split($1,a,",");print length(a)}' >conserve/At_allIRcount.txt
## 统计所以的IR都不保守的基因
cut -f1,2 -d "_" ../../converse/3_1 |uniq -c |awk '{print $2"\t"$1}' >1
cut -f1 1 |xargs -I {} grep {} ../../../../all_homole_ES_IR.txt |cut -f6|sed 's/|.*//g' |awk '{split($1,a,",");print length(a)}'|paste 1 - >2
A2中保守与非保守的统计
cat A2_At_conserve_IR.txt D5_A2_conserve_IR.txt |awk '$1~/^e/{print $1}$1~/^G/{print $2}'|sort |uniq |cut -f1 -d "_"|uniq -c |awk '{print $2"\t"$1}' >conserve/A2_conserve.txt
## 对应的IR总数
## 非保守的基因
D5中保守与非保守的统计
cat D5_* |awk '$1~/Gor/{print $1}$1~/Ghir/{print $2}'|sort |uniq|cut -f1 -d "_"|sort |uniq -c |awk '{print $2"\t"$1}' >conserve/D5_conserve.txt
## 获取保守的总IR数目
Dt中保守与非保守的统计
cat D5_Dt_conserve_IR.txt At_Dt_conserve_IR.txt |cut -f1 |sort |uniq |cut -f1,2 -d "_"|uniq -c |awk '{print $2"\t"$1}' >conserve/Dt_conserve.txt
## 保守的总gene数
cut -f1 Dt_conserve.txt |xargs -I {} grep {} ../../../../all_homole_ES_IR.txt |cut -f2|sed 's/|.*//g' |awk '{split($1,a,",");print length(a)}' >1
画热图展示
## A2保守率
cat A2_*|awk '{print $2/$3"\t"$1}'|sort -k2,2 -k1,1 -r |uniq -f1|awk '{print $2"\t"$1}'
## At保守率
cat At_conserve.txt At_noconserve.txt |awk '{print $2/$3"\t"$1}'|sort -k2,2 -k1,1 -r |uniq -f1|awk '{print $2"\t"$1}'
## Dt保守率
cat Dt_*|awk '{print $2/$3"\t"$1}'|sort -k2,2 -k1,1 -r |uniq -f1|awk '{print $2"\t"$1}'
## D5保守率
cat D5_*|awk '{print $2/$3"\t"$1}'|sort -k2,2 -k1,1 -r |uniq -f1|awk '{print $2"\t"$1}'
Last updated