## At中不存在
cat converse/2_2 |xargs -I {} grep -A1 {} Dt_intron_junction.fasta >2_2.fasta
## blast比对
blastn -query 2_2.fasta -db ../blastDB/At -outfmt "6 qseqid sseqid qstart qend sstart send nident pident qcovs evalue bitscore" -evalue 1e-5 -out 2_2.query_blast.txt
## 比对之后有些基因不见了估计是不保守的原因吧
awk '$7>150&&$8>80&&$9>80{print $0}' 2_2.query_blast.txt
## 根据相似度和覆盖度在80%以上同时考虑正负链的情况
awk '$7>150&&$8>80&&$9>80{print $0}' 2_2.query_blast.txt|awk '{a[$1":"$2]+=1}END{for(i in a){if(a[i]>=2){split(i,b,":");print b[2];}}}'|xargs -I {} grep {} 2_2.query_blast.txt |awk '$7>150&&$8>80&&$9>80{print $0}'|awk '$3+100<300{if($6>a[$1":"$2][0]&&$6>$5){a[$1":"$2][0]=$6}else if($6<$5&&$5>a[$1":"$2][1]){a[$1":"$2][1]=$6}}$3+100>300{if($5>a[$1":"$2][1]&&$6>$5){a[$1":"$2][1]=$5}else if($6<$5&&$5>a[$1":"$2][0]){a[$1":"$2][0]=$5}}END{for(i in a){print i"\t"a[i][0]"\t"a[i][1]"\t"a[i][1]-a[i][0]+1}}'|awk '$4>0{print $0}' >2_no
有了,没有发生IR事件的坐标之后可以看一下对应的甲基化程度怎么样
## 非保守的At处理方式
awk -F ":" '{print $2}' 2_no|awk '{split($1,a,"_");print a[3]"_"a[4]"\t"a[5]+$2-1"\t"a[5]+$3-1"\t"a[7]"\t"a[1]"_"a[2]"_"a[3]"_"a[4]"_"a[5]+$2-1"_"a[5]+$3-1"_"a[7]}' >At_noconverse.bed
## 提取各个基因组甲基化位点
grep "Ghir_D" ~/work/Alternative/result/homologo/IntronR/intronR/test/converseBed/2_2.bed >Dt_converse.bed
grep "Gor" ~/work/Alternative/result/homologo/IntronR/intronR/test/converseBed/2_2.bed >D5_converse.bed
grep "evm" ~/work/Alternative/result/homologo/IntronR/intronR/test/converseBed/2_2.bed >A2_converse.bed