20200108保守的TAD
筛选保守的TAD水平
根据boundary提取对应的共线性区域
location=
awk '$5<="'$location'"+100000&&$4>="'$location'"-100000{print $0}'
将一对多的boundary都归于不保守的类型
awk -F "\t" '$3!=""{print $1"\tNone"}$3==""{print $0}' A2_K12_100000_conserve.txt
统计保守的TAD
例如某个TAD两端的boundary和一个TAD两端的boundary分别保守
## 把不保守的boundary排序在外
awk -F "\t" '$3!=""{print $1"\tNone"}$3==""{print $0}' A2_K12_100000_conserve.txt | awk '$2!="None"{split($1,a,"_");split($2,b,"_");print a[6],b[6],$0}' OFS="\t"|sort -k1,1n|awk '{a[NR][1]=$1;a[NR][2]=$2;a[NR][3]=$3;a[NR][4]=$4}END{
for(i=2;i<=NR;i++){
if(a[i][1]==a[i-1][1]&&a[i][2]==a[i-1][2])
{
print a[i-1][3]"\t"a[i][4]
}
if(a[i][1]==a[i-1][1]&&a[i][2]-a[i-1][2]==1)
{
print a[i-1][3]"\t"a[i][4]
}
if(a[i][1]==a[i-1][1]&&a[i][2]-a[i-1][2]==-1)
{
print a[i-1][3]"\t"a[i-1][4]"\tinversion"
}
if(a[i][1]-a[i-1][1]==1&&a[i][2]==a[i-1][2]&&a[i][3]!~/left/)
{
print a[i][3]"\t"a[i][4]
}
if(a[i][1]-a[i-1][1]==1&&a[i][2]-a[i-1][2]==1&&a[i][3]!~/left/)
{
print a[i][3]"\t"a[i][4]
}
if(a[i][1]-a[i-1][1]==1&&a[i][2]-a[i-1][2]==-1&&a[i][3]!~/left/)
{
print a[i][3]"\t"a[i-1][4]"\tinversion"
}
}}'
有一对多的情况明天再看看
排好序的文件
awk -F "\t" '$3!=""{print $1"\tNone"}$3==""{print $0}' A2_K12_100000_conserve.txt | awk '{split($1,a,"_");split($2,b,"_");print a[6],b[6],$0}' OFS="\t"|sort -k1,1n
Last updated