20200108保守的TAD

筛选保守的TAD水平

根据boundary提取对应的共线性区域

location=
awk '$5<="'$location'"+100000&&$4>="'$location'"-100000{print $0}'

将一对多的boundary都归于不保守的类型

awk -F "\t" '$3!=""{print $1"\tNone"}$3==""{print $0}' A2_K12_100000_conserve.txt

统计保守的TAD

例如某个TAD两端的boundary和一个TAD两端的boundary分别保守

## 把不保守的boundary排序在外
awk -F "\t" '$3!=""{print $1"\tNone"}$3==""{print $0}' A2_K12_100000_conserve.txt | awk '$2!="None"{split($1,a,"_");split($2,b,"_");print a[6],b[6],$0}' OFS="\t"|sort -k1,1n|awk '{a[NR][1]=$1;a[NR][2]=$2;a[NR][3]=$3;a[NR][4]=$4}END{
for(i=2;i<=NR;i++){
if(a[i][1]==a[i-1][1]&&a[i][2]==a[i-1][2])
    {
    print a[i-1][3]"\t"a[i][4]
    }
if(a[i][1]==a[i-1][1]&&a[i][2]-a[i-1][2]==1)
    {
    print a[i-1][3]"\t"a[i][4]
    }
if(a[i][1]==a[i-1][1]&&a[i][2]-a[i-1][2]==-1)
    {
    print a[i-1][3]"\t"a[i-1][4]"\tinversion"
    }

if(a[i][1]-a[i-1][1]==1&&a[i][2]==a[i-1][2]&&a[i][3]!~/left/)
    {
    print a[i][3]"\t"a[i][4]
    }
if(a[i][1]-a[i-1][1]==1&&a[i][2]-a[i-1][2]==1&&a[i][3]!~/left/)
    {
    print a[i][3]"\t"a[i][4]
    }
if(a[i][1]-a[i-1][1]==1&&a[i][2]-a[i-1][2]==-1&&a[i][3]!~/left/)
    {
    print a[i][3]"\t"a[i-1][4]"\tinversion"
    }    
}}'

有一对多的情况明天再看看

排好序的文件

awk -F "\t" '$3!=""{print $1"\tNone"}$3==""{print $0}' A2_K12_100000_conserve.txt | awk '{split($1,a,"_");split($2,b,"_");print a[6],b[6],$0}' OFS="\t"|sort -k1,1n

Last updated