163 lines
6.1 KiB
Perl
163 lines
6.1 KiB
Perl
|
|
#!/usr/bin/perl
|
||
|
|
use strict;
|
||
|
|
use warnings;
|
||
|
|
|
||
|
|
die "usage:perl $0 depth_file input out" unless @ARGV == 3;
|
||
|
|
my ($depth, $input, $out) = @ARGV;
|
||
|
|
|
||
|
|
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
||
|
|
|
||
|
|
# open IN,"$outputDir/fusion/${name}.fusion.hg19_multianno.vcf";
|
||
|
|
# open OUT1,">$outputDir/fusion/${name}.fusion.reanno.vcf";
|
||
|
|
|
||
|
|
open IN, "$input";
|
||
|
|
open OUT1, "> $out";
|
||
|
|
|
||
|
|
open SD, "$public_path/gene_strand.txt";
|
||
|
|
my %strand;
|
||
|
|
while (<SD>) {
|
||
|
|
chomp;
|
||
|
|
my @line = split;
|
||
|
|
$strand{$line[1]} = $line[0];
|
||
|
|
}
|
||
|
|
|
||
|
|
while (<IN>) {
|
||
|
|
if (/^##/) {
|
||
|
|
print OUT1;
|
||
|
|
next;
|
||
|
|
}
|
||
|
|
if (/^#CHROM/) {
|
||
|
|
chomp;
|
||
|
|
print OUT1 "$_\tCHROM2\tPOS2\tINFO2\tGENE1\tGENE2\tFUSION\tFREQ1\tFREQ2\n";
|
||
|
|
next;
|
||
|
|
}
|
||
|
|
chomp;
|
||
|
|
my @line = split(/\t/);
|
||
|
|
$line[7] =~ /SVTYPE=(.*?);/;
|
||
|
|
my $SVTYPE = $1;
|
||
|
|
if ($SVTYPE eq 'INV') {
|
||
|
|
$line[7] =~ /SVLEN=(.*?);/;
|
||
|
|
if ($1 > 1000) {
|
||
|
|
my $freq_inv = sprintf("%.2f", (split(/:/, $line[9]))[1] / (split(/:/, $line[9]))[7] * 100);
|
||
|
|
next if $freq_inv < 0.3;
|
||
|
|
my $pt_inv = $_;
|
||
|
|
my $gene1 = &site2gene($line[0], $line[1]);
|
||
|
|
$line[7] =~ /END=(.*?);/;
|
||
|
|
my $end = $1;
|
||
|
|
my $gene2 = &site2gene($line[0], $end);
|
||
|
|
my $depth = &depth($line[0], $line[1], $line[0], $end);
|
||
|
|
my $freq_inv2;
|
||
|
|
if ($depth) {$freq_inv2 = sprintf("%.2f", (split(/:/, $line[9]))[1] / $depth * 100)}
|
||
|
|
else {$freq_inv2 = "-"};
|
||
|
|
print OUT1 "$pt_inv\t$line[0]\t$end\t-\t$gene1\t$gene2\t$gene1-$gene2\t$freq_inv\t$freq_inv2\n";
|
||
|
|
print OUT1 "$pt_inv\t$line[0]\t$end\t-\t$gene2\t$gene1\t$gene2-$gene1\t$freq_inv\t$freq_inv2\n";
|
||
|
|
next;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
elsif ($SVTYPE eq 'BND') {
|
||
|
|
if ($line[2] =~ /(\d+)_1/) {
|
||
|
|
$line[7] =~ /EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/;
|
||
|
|
my ($event, $region1, $gene1) = ($1, $2, $3);
|
||
|
|
my $pt = "$_";
|
||
|
|
my $mate = <IN>;
|
||
|
|
chomp $mate;
|
||
|
|
my @line2 = split(/\t/, $mate);
|
||
|
|
$line2[7] =~ /EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/;
|
||
|
|
my ($event2, $region2, $gene2) = ($1, $2, $3);
|
||
|
|
$gene1 = (split /\\x3b/, $gene1)[0];
|
||
|
|
$gene2 = (split /\\x3b/, $gene2)[0];
|
||
|
|
if ($event eq $event2 and $line2[2] =~ /_2/ and $gene1 ne $gene2 and $region1 !~ /upstream|downstream|intergenic|UTR/ and $region2 !~ /upstream|downstream|intergenic|UTR/) {
|
||
|
|
my $freq = sprintf("%.2f", (split(/:/, $line[9]))[1] / (split(/:/, $line[9]))[7] * 100);
|
||
|
|
print($freq);
|
||
|
|
next if $freq < 0.3;
|
||
|
|
my $strand1 = &gene2strand($gene1);
|
||
|
|
my $strand2 = &gene2strand($gene2);
|
||
|
|
my $depth = &depth($line[0], $line[1], $line2[0], $line2[1]);
|
||
|
|
my $freq_bnd;
|
||
|
|
if ($depth) {$freq_bnd = sprintf("%.2f", (split(/:/, $line[9]))[1] / $depth * 100)}
|
||
|
|
else {$freq_bnd = "-"};
|
||
|
|
if ($line[4] =~ /^N\[/) {
|
||
|
|
if ($strand1 eq '+' and $strand2 eq '+') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
elsif ($strand1 eq '-' and $strand2 eq '-') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
print($gene1, '.....\n')
|
||
|
|
}
|
||
|
|
}
|
||
|
|
elsif ($line[4] =~ /^N\]/) {
|
||
|
|
if ($strand1 eq '+' and $strand2 eq '-') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
elsif ($strand1 eq '-' and $strand2 eq '+') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
elsif ($line[4] =~ /\]N$/) {
|
||
|
|
if ($strand1 eq '+' and $strand2 eq '+') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
elsif ($strand1 eq '-' and $strand2 eq '-') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
elsif ($line[4] =~ /\[N$/) {
|
||
|
|
if ($strand1 eq '+' and $strand2 eq '-') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
elsif ($strand1 eq '-' and $strand2 eq '+') {
|
||
|
|
print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
print($gene1 'NNN.......\n')
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
print($gene1, '#############\n')
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
sub site2gene {
|
||
|
|
my ($chr, $pos) = @_;
|
||
|
|
my %gene;
|
||
|
|
open REFGENE, "$public_path/hg19_refGene.txt";
|
||
|
|
my @gene;
|
||
|
|
while (<REFGENE>) {
|
||
|
|
my @line = split(/\t/, $_);
|
||
|
|
if ($chr eq $line[2] and $pos >= $line[4] and $pos <= $line[5]) {
|
||
|
|
$gene{$line[-4]}++;
|
||
|
|
push @gene, $line[-4] if $gene{$line[-4]} < 2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return join("|", @gene);
|
||
|
|
}
|
||
|
|
|
||
|
|
sub depth {
|
||
|
|
my ($chr1, $pos1, $chr2, $pos2) = @_;
|
||
|
|
my $depth1 = `less $depth|grep -w $chr1|grep -w $pos1`;
|
||
|
|
my $depth2 = `less $depth|grep -w $chr2|grep -w $pos2`;
|
||
|
|
$depth1 = (split(/\t/, $depth1))[3] if $depth1;
|
||
|
|
$depth2 = (split(/\t/, $depth2))[3] if $depth2;
|
||
|
|
my $depth;
|
||
|
|
if ($depth1 and $depth2) {
|
||
|
|
$depth = ($depth1 + $depth2) / 2
|
||
|
|
}
|
||
|
|
elsif ($depth1 and !$depth2) {
|
||
|
|
$depth = $depth1;
|
||
|
|
}
|
||
|
|
elsif (!$depth1 and $depth2) {
|
||
|
|
$depth = $depth2;
|
||
|
|
}
|
||
|
|
return $depth;
|
||
|
|
}
|
||
|
|
|
||
|
|
sub gene2strand {
|
||
|
|
my $gene = shift @_;
|
||
|
|
return $strand{$gene};
|
||
|
|
}
|