保守的IR的长度统计

# At与Dt中保守的IR的长度比较
awk '{split($1,D,"_");split($2,A,"_");if(D[7]-A[7]>100 ||D[7]-A[7]<-100){}else{print $0}}' At_Dt_conserve_IR.txt
# A2 与D5中保守的IR长度比较
awk '{split($1,D,"_");split($2,A,"_");if(D[5]-A[5]>100 ||D[5]-A[5]<-100){}else{print $0}}' D5_A2_conserve_IR.txt
# D5与Dt中保守的IR长度
awk '{split($1,D,"_");split($2,A,"_");if(D[7]-A[5]>100 ||D[7]-A[5]<-100){}else{print $0}}' D5_Dt_conserve_IR.txt
# A2与At中保守的IR长度
awk '{split($2,D,"_");split($1,A,"_");if(D[7]-A[5]>100 ||D[7]-A[5]<-100){}else{print $0}}' A2_At_conserve_IR.txt

比较保守的IR长度与不保守的IR长度

只有At、Dt、A2、D5中存在的IR :3_1、3_3、4_3、4_4

## 将共有的序列长度信息保存
awk '{split($1,D,"_");split($2,A,"_");print "consensus\t"D[7]"\nconsensus\t"A[7]}' At_Dt_conserve_IR.txt >>IRlength.txt
awk '{split($1,D,"_");split($2,A,"_");print "consensus\t"D[5]"\nconsensus\t"A[5]}' D5_A2_conserve_IR.txt  >>IRlength.txt
awk '{split($1,D,"_");split($2,A,"_");print "consensus\t"D[7]"\nconsensus\t"A[5]}' D5_Dt_conserve_IR.txt >>IRlength.txt
awk '{split($2,D,"_");split($1,A,"_");print "consensus\t"D[7]"\nconsensus\t"A[5]}' A2_At_conserve_IR.txt  >>IRlength.txt
## 只在单个基因组出现的IR事件
awk '{split($1,A,"_");print "At\t"A[7]}' ../converse/3_1  >>IRlength.txt
awk '{split($1,A,"_");print "Dt\t"A[7]}' ../converse/3_3  >>IRlength.txt
awk '{split($1,A,"_");print "D5\t"A[5]}' ../converse/4_4  >>IRlength.txt
awk '{split($1,A,"_");print "A2\t"A[5]}' ../converse/4_3  >>IRlength.txt

统计保守IR占总的IR的比例

统计每个基因保守的IR的数目、以及总的IR的数目

cat A2_At_conserve_IR.txt  At_Dt_conserve_IR.txt |cut -f2|sort |uniq |cut -f1,2 -d "_"|uniq -c |awk '{print $2"\t"$1}'
## 统计总的IR的数目
cat A2_At_conserve_IR.txt  At_Dt_conserve_IR.txt |cut -f2|sort |uniq |cut -f1,2 -d "_"|uniq -c |awk '{print $2}'|xargs -I {} grep {} ../../../all_homole_ES_IR.txt |cut -f6|sed 's/|.*//g' |awk '{split($1,a,",");print length(a)}' >conserve/At_allIRcount.txt
## 统计所以的IR都不保守的基因
cut -f1,2 -d "_"  ../../converse/3_1 |uniq -c |awk '{print $2"\t"$1}' >1
cut -f1 1 |xargs -I {} grep {} ../../../../all_homole_ES_IR.txt |cut -f6|sed 's/|.*//g' |awk '{split($1,a,",");print length(a)}'|paste  1 - >2

A2中保守与非保守的统计

cat A2_At_conserve_IR.txt  D5_A2_conserve_IR.txt  |awk '$1~/^e/{print $1}$1~/^G/{print $2}'|sort |uniq |cut -f1 -d "_"|uniq -c |awk '{print $2"\t"$1}' >conserve/A2_conserve.txt
## 对应的IR总数
## 非保守的基因

D5中保守与非保守的统计

cat D5_*  |awk '$1~/Gor/{print $1}$1~/Ghir/{print $2}'|sort |uniq|cut -f1 -d "_"|sort |uniq -c |awk '{print $2"\t"$1}' >conserve/D5_conserve.txt
## 获取保守的总IR数目

Dt中保守与非保守的统计

cat D5_Dt_conserve_IR.txt At_Dt_conserve_IR.txt |cut -f1 |sort |uniq |cut -f1,2 -d "_"|uniq -c |awk '{print $2"\t"$1}'  >conserve/Dt_conserve.txt
## 保守的总gene数
cut -f1 Dt_conserve.txt |xargs  -I {} grep {} ../../../../all_homole_ES_IR.txt |cut -f2|sed 's/|.*//g' |awk '{split($1,a,",");print length(a)}' >1

画热图展示

## A2保守率
 cat A2_*|awk '{print $2/$3"\t"$1}'|sort  -k2,2 -k1,1 -r |uniq  -f1|awk '{print $2"\t"$1}'
## At保守率
cat At_conserve.txt At_noconserve.txt |awk '{print $2/$3"\t"$1}'|sort  -k2,2 -k1,1 -r |uniq  -f1|awk '{print $2"\t"$1}'
## Dt保守率
 cat Dt_*|awk '{print $2/$3"\t"$1}'|sort  -k2,2 -k1,1 -r |uniq  -f1|awk '{print $2"\t"$1}'
## D5保守率
cat D5_*|awk '{print $2/$3"\t"$1}'|sort  -k2,2 -k1,1 -r |uniq  -f1|awk '{print $2"\t"$1}'

Last updated