137 lines
5.0 KiB
Perl
137 lines
5.0 KiB
Perl
|
|
#!/usr/bin/perl
|
||
|
|
use strict;
|
||
|
|
use warnings;
|
||
|
|
|
||
|
|
die "usage:perl $0 depth_file outputDir name" unless @ARGV==3;
|
||
|
|
open SD,"/dataseq/jmdna/codes/pancancer_controlsample/gene_strand.txt";
|
||
|
|
my %strand;
|
||
|
|
while(<SD>){
|
||
|
|
chomp;
|
||
|
|
my @line=split;
|
||
|
|
$strand{$line[1]}=$line[0];
|
||
|
|
}
|
||
|
|
|
||
|
|
my($depth,$outputDir,$name)=@ARGV;
|
||
|
|
open IN,"$outputDir/fusion/${name}.fusion.hg19_multianno.vcf";
|
||
|
|
open OUT1,">$outputDir/fusion/${name}.fusion.reanno.vcf";
|
||
|
|
open OUT2, ">$outputDir/fusion/${name}.longindel.vcf";
|
||
|
|
while(<IN>){
|
||
|
|
if(/^##/){
|
||
|
|
print OUT1;
|
||
|
|
print OUT2;
|
||
|
|
next;
|
||
|
|
}
|
||
|
|
if(/^#CHROM/){
|
||
|
|
chomp;
|
||
|
|
print OUT1 "$_\tCHROM2\tPOS2\tINFO2\tGENE1\tGENE2\tFUSION\tFREQ1\tFREQ2\n";
|
||
|
|
print OUT2 "$_\n";
|
||
|
|
next;
|
||
|
|
}
|
||
|
|
chomp;
|
||
|
|
my @line=split(/\t/);
|
||
|
|
$line[7]=~/SVTYPE=(.*?);/;
|
||
|
|
my $SVTYPE=$1;
|
||
|
|
if($SVTYPE eq 'DEL' or $SVTYPE eq 'DUP' or $SVTYPE eq 'INS'){
|
||
|
|
print OUT2 "$_\n";
|
||
|
|
next;
|
||
|
|
}elsif($SVTYPE eq 'INV'){
|
||
|
|
$line[7]=~/SVLEN=(.*?);/;
|
||
|
|
if($1>1000){
|
||
|
|
my $pt_inv=$_;
|
||
|
|
my $gene1=&site2gene($line[0],$line[1]);
|
||
|
|
$line[7]=~/END=(.*?);/;
|
||
|
|
my $end=$1;
|
||
|
|
my $gene2=&site2gene($line[0],$end);
|
||
|
|
my $freq_inv=sprintf("%.2f",(split(/:/,$line[9]))[1]/(split(/:/,$line[9]))[7]*100);
|
||
|
|
my $depth=&depth($line[0],$line[1],$line[0],$end);
|
||
|
|
my $freq_inv2;
|
||
|
|
if($depth){$freq_inv2=sprintf("%.2f",(split(/:/,$line[9]))[1]/$depth*100)}else{$freq_inv2="-"};
|
||
|
|
print OUT1"$pt_inv\t$line[0]\t$end\t-\t$gene1\t$gene2\t$gene1-$gene2\t$freq_inv\t$freq_inv2\n";
|
||
|
|
print OUT1"$pt_inv\t$line[0]\t$end\t-\t$gene2\t$gene1\t$gene2-$gene1\t$freq_inv\t$freq_inv2\n";
|
||
|
|
next;
|
||
|
|
}
|
||
|
|
}elsif($SVTYPE eq 'BND'){
|
||
|
|
if($line[2]=~/(\d+)_1/){
|
||
|
|
$line[7]=~/EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/;
|
||
|
|
my ($event,$region1,$gene1)=($1,$2,$3);
|
||
|
|
my $pt="$_";
|
||
|
|
my $mate=<IN>;
|
||
|
|
chomp $mate;
|
||
|
|
my @line2=split(/\t/,$mate);
|
||
|
|
$line2[7]=~/EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/;
|
||
|
|
my ($event2,$region2,$gene2)=($1,$2,$3);
|
||
|
|
if($event eq $event2 and $line2[2]=~/_2/ and $gene1 ne $gene2 and ($region1 !~ /upstream|downstream|intergenic/ or $region2 !~ /upstream|downstream|intergenic/)){
|
||
|
|
my $strand1=&gene2strand($gene1);
|
||
|
|
my $strand2=&gene2strand($gene2);
|
||
|
|
my $freq=sprintf("%.2f",(split(/:/,$line[9]))[1]/(split(/:/,$line[9]))[7]*100);
|
||
|
|
my $depth=&depth($line[0],$line[1],$line2[0],$line2[1]);
|
||
|
|
my $freq_bnd;
|
||
|
|
if ($depth){$freq_bnd=sprintf("%.2f",(split(/:/,$line[9]))[1]/$depth*100)}else{$freq_bnd="-"};
|
||
|
|
if($line[4]=~/^N\[/){
|
||
|
|
if($strand1 eq '+' and $strand2 eq '+'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}elsif($strand1 eq '-' and $strand2 eq '-'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}elsif($line[4]=~/^N\]/){
|
||
|
|
if($strand1 eq '+' and $strand2 eq '-'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}elsif($strand1 eq '-' and $strand2 eq '+'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}elsif($line[4]=~/\]N$/){
|
||
|
|
if($strand1 eq '+' and $strand2 eq '+'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}elsif($strand1 eq '-' and $strand2 eq '-'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}elsif($line[4]=~/\[N$/){
|
||
|
|
if($strand1 eq '+' and $strand2 eq '-'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}elsif($strand1 eq '-' and $strand2 eq '+'){
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
sub site2gene{
|
||
|
|
my ($chr,$pos)=@_;
|
||
|
|
my %gene;
|
||
|
|
open REFGENE,"/dataseq/jmdna/software/annovar/humandb/hg19_refGene.txt";
|
||
|
|
my @gene;
|
||
|
|
while(<REFGENE>){
|
||
|
|
my @line=split(/\t/,$_);
|
||
|
|
if ($chr eq $line[2] and $pos>=$line[4] and $pos<=$line[5]){
|
||
|
|
$gene{$line[-4]}++;
|
||
|
|
push @gene,$line[-4] if $gene{$line[-4]}<2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return join("|",@gene);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
sub depth{
|
||
|
|
my($chr1,$pos1,$chr2,$pos2)=@_;
|
||
|
|
my $depth1=`less $depth|grep -w $chr1|grep -w $pos1`;
|
||
|
|
my $depth2=`less $depth|grep -w $chr2|grep -w $pos2`;
|
||
|
|
$depth1=(split(/\t/,$depth1))[3] if $depth1;
|
||
|
|
$depth2=(split(/\t/,$depth2))[3] if $depth2;
|
||
|
|
my $depth;
|
||
|
|
if($depth1 and $depth2){
|
||
|
|
$depth=($depth1+$depth2)/2
|
||
|
|
}elsif($depth1 and !$depth2){
|
||
|
|
$depth=$depth1;
|
||
|
|
}elsif(!$depth1 and $depth2){
|
||
|
|
$depth=$depth2;
|
||
|
|
}
|
||
|
|
return $depth;
|
||
|
|
}
|
||
|
|
|
||
|
|
sub gene2strand{
|
||
|
|
my $gene=shift @_;
|
||
|
|
return $strand{$gene};
|
||
|
|
}
|