#!/usr/bin/perl use strict; use warnings; die "usage:perl $0 depth_file outputDir name" unless @ARGV==3; open SD,"/dataseq/jmdna/codes/pancancer_controlsample/gene_strand.txt"; my %strand; while(){ chomp; my @line=split; $strand{$line[1]}=$line[0]; } my($depth,$outputDir,$name)=@ARGV; open IN,"$outputDir/fusion/${name}.fusion.hg19_multianno.vcf"; open OUT1,">$outputDir/fusion/${name}.fusion.reanno.vcf"; open OUT2, ">$outputDir/fusion/${name}.longindel.vcf"; while(){ if(/^##/){ print OUT1; print OUT2; next; } if(/^#CHROM/){ chomp; print OUT1 "$_\tCHROM2\tPOS2\tINFO2\tGENE1\tGENE2\tFUSION\tFREQ1\tFREQ2\n"; print OUT2 "$_\n"; next; } chomp; my @line=split(/\t/); $line[7]=~/SVTYPE=(.*?);/; my $SVTYPE=$1; if($SVTYPE eq 'DEL' or $SVTYPE eq 'DUP' or $SVTYPE eq 'INS'){ print OUT2 "$_\n"; next; }elsif($SVTYPE eq 'INV'){ $line[7]=~/SVLEN=(.*?);/; if($1>1000){ my $pt_inv=$_; my $gene1=&site2gene($line[0],$line[1]); $line[7]=~/END=(.*?);/; my $end=$1; my $gene2=&site2gene($line[0],$end); my $freq_inv=sprintf("%.2f",(split(/:/,$line[9]))[1]/(split(/:/,$line[9]))[7]*100); my $depth=&depth($line[0],$line[1],$line[0],$end); my $freq_inv2; if($depth){$freq_inv2=sprintf("%.2f",(split(/:/,$line[9]))[1]/$depth*100)}else{$freq_inv2="-"}; print OUT1"$pt_inv\t$line[0]\t$end\t-\t$gene1\t$gene2\t$gene1-$gene2\t$freq_inv\t$freq_inv2\n"; print OUT1"$pt_inv\t$line[0]\t$end\t-\t$gene2\t$gene1\t$gene2-$gene1\t$freq_inv\t$freq_inv2\n"; next; } }elsif($SVTYPE eq 'BND'){ if($line[2]=~/(\d+)_1/){ $line[7]=~/EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/; my ($event,$region1,$gene1)=($1,$2,$3); my $pt="$_"; my $mate=; chomp $mate; my @line2=split(/\t/,$mate); $line2[7]=~/EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/; my ($event2,$region2,$gene2)=($1,$2,$3); if($event eq $event2 and $line2[2]=~/_2/ and $gene1 ne $gene2 and ($region1 !~ /upstream|downstream|intergenic/ or $region2 !~ /upstream|downstream|intergenic/)){ my $strand1=&gene2strand($gene1); my $strand2=&gene2strand($gene2); my $freq=sprintf("%.2f",(split(/:/,$line[9]))[1]/(split(/:/,$line[9]))[7]*100); my $depth=&depth($line[0],$line[1],$line2[0],$line2[1]); my $freq_bnd; if ($depth){$freq_bnd=sprintf("%.2f",(split(/:/,$line[9]))[1]/$depth*100)}else{$freq_bnd="-"}; if($line[4]=~/^N\[/){ if($strand1 eq '+' and $strand2 eq '+'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n"; }elsif($strand1 eq '-' and $strand2 eq '-'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n"; } }elsif($line[4]=~/^N\]/){ if($strand1 eq '+' and $strand2 eq '-'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n"; }elsif($strand1 eq '-' and $strand2 eq '+'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n"; } }elsif($line[4]=~/\]N$/){ if($strand1 eq '+' and $strand2 eq '+'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n"; }elsif($strand1 eq '-' and $strand2 eq '-'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n"; } }elsif($line[4]=~/\[N$/){ if($strand1 eq '+' and $strand2 eq '-'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\n"; }elsif($strand1 eq '-' and $strand2 eq '+'){ print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\n"; } } } } } } sub site2gene{ my ($chr,$pos)=@_; my %gene; open REFGENE,"/dataseq/jmdna/software/annovar/humandb/hg19_refGene.txt"; my @gene; while(){ my @line=split(/\t/,$_); if ($chr eq $line[2] and $pos>=$line[4] and $pos<=$line[5]){ $gene{$line[-4]}++; push @gene,$line[-4] if $gene{$line[-4]}<2; } } return join("|",@gene); } sub depth{ my($chr1,$pos1,$chr2,$pos2)=@_; my $depth1=`less $depth|grep -w $chr1|grep -w $pos1`; my $depth2=`less $depth|grep -w $chr2|grep -w $pos2`; $depth1=(split(/\t/,$depth1))[3] if $depth1; $depth2=(split(/\t/,$depth2))[3] if $depth2; my $depth; if($depth1 and $depth2){ $depth=($depth1+$depth2)/2 }elsif($depth1 and !$depth2){ $depth=$depth1; }elsif(!$depth1 and $depth2){ $depth=$depth2; } return $depth; } sub gene2strand{ my $gene=shift @_; return $strand{$gene}; }