#!/usr/bin/perl use strict; use warnings; die "usage:perl $0 depth_file input out project" unless @ARGV == 4; my ($depth_file, $input, $out, $project) = @ARGV; my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/"; print "Fusion过滤使用public路径:$public_path\n"; open IN, "$input"; open OUT1, "> $out"; open SD, "$public_path/gene_strand.txt"; my %strand; while () { chomp; my @line = split; $strand{$line[1]} = $line[0]; } my $fusion = info(); my @fusions = @$fusion; while () { if (/^##/) { next; } if (/^#CHROM/) { chomp; print OUT1 "$_\tCHROM2\tPOS2\tINFO2\tGENE1\tGENE2\tFUSION\tFREQ1\tFREQ2\tref_gene\n"; next; } chomp; my @line = split(/\t/); $line[7] =~ /SVTYPE=(.*?);/; my $SVTYPE = $1; if ($SVTYPE eq 'INV') { $line[7] =~ /SVLEN=(.*?);/; if ($1 > 1000) { my $freq_inv = sprintf("%.2f", (split(/:/, $line[9]))[1] / (split(/:/, $line[9]))[7] * 100); next if $freq_inv < 0.3; my $pt_inv = $_; my $gene1 = &site2gene($line[0], $line[1]); $line[7] =~ /END=(.*?);/; my $end = $1; my $gene2 = &site2gene($line[0], $end); # if (!((grep {$_ eq $gene1} @fusions) or (grep {$_ eq $gene2} @fusions))) { # next; # } my $ref_gene; if (grep {$_ eq $gene1} @fusions) { $ref_gene = $gene1; } if (grep {$_ eq $gene2} @fusions) { $ref_gene = $gene2; } if (!(defined $ref_gene)) { next; } my $depth = &depth($line[0], $line[1], $line[0], $end); my $freq_inv2; if (defined $depth) {$freq_inv2 = sprintf("%.2f", (split(/:/, $line[9]))[1] / $depth * 100)} else {$freq_inv2 = "-"}; print OUT1 "$pt_inv\t$line[0]\t$end\t-\t$gene1\t$gene2\t$gene1-$gene2\t$freq_inv\t$freq_inv2\t$ref_gene\n"; print OUT1 "$pt_inv\t$line[0]\t$end\t-\t$gene2\t$gene1\t$gene2-$gene1\t$freq_inv\t$freq_inv2\t$ref_gene\n"; next; } } elsif ($SVTYPE eq 'BND') { if ($line[2] =~ /(\d+)_1/) { $line[7] =~ /EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/; my ($event, $region1, $gene1) = ($1, $2, $3); my $pt = "$_"; my $mate = ; chomp $mate; my @line2 = split(/\t/, $mate); $line2[7] =~ /EVENT=(.*?);\S+Func.refGene=(.*?);Gene.refGene=(.*?);/; my ($event2, $region2, $gene2) = ($1, $2, $3); $gene1 = (split /\\x3b/, $gene1)[0]; $gene2 = (split /\\x3b/, $gene2)[0]; # if (!(grep {$_ eq $gene1} @fusions or grep {$_ eq $gene2} @fusions)) { # next; # } my $ref_gene; if (grep {$_ eq $gene1} @fusions) { $ref_gene = $gene1; } if (grep {$_ eq $gene2} @fusions) { $ref_gene = $gene2; } if (!(defined $ref_gene)) { next; } if ($event eq $event2 and $line2[2] =~ /_2/ and $gene1 ne $gene2 and $region1 !~ /upstream|downstream|intergenic|UTR/ and $region2 !~ /upstream|downstream|intergenic|UTR/) { my $freq = sprintf("%.2f", (split(/:/, $line[9]))[1] / (split(/:/, $line[9]))[7] * 100); next if $freq < 0.3; my $strand1 = &gene2strand($gene1); my $strand2 = &gene2strand($gene2); my $depth = &depth($line[0], $line[1], $line2[0], $line2[1]); my $freq_bnd; if (defined $depth) {$freq_bnd = sprintf("%.2f", (split(/:/, $line[9]))[1] / $depth * 100)} else {$freq_bnd = "-"}; if ($line[4] =~ /^N\[/) { if ($strand1 eq '+' and $strand2 eq '+') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\t$ref_gene\n"; } elsif ($strand1 eq '-' and $strand2 eq '-') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\t$ref_gene\n"; } else { print($gene1, '.....\n') } } elsif ($line[4] =~ /^N\]/) { if ($strand1 eq '+' and $strand2 eq '-') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\t$ref_gene\n"; } elsif ($strand1 eq '-' and $strand2 eq '+') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\t$ref_gene\n"; } } elsif ($line[4] =~ /\]N$/) { if ($strand1 eq '+' and $strand2 eq '+') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\t$ref_gene\n"; } elsif ($strand1 eq '-' and $strand2 eq '-') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\t$ref_gene\n"; } } elsif ($line[4] =~ /\[N$/) { if ($strand1 eq '+' and $strand2 eq '-') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene2\t$gene1\t$gene2-$gene1\t$freq\t$freq_bnd\t$ref_gene\n"; } elsif ($strand1 eq '-' and $strand2 eq '+') { print OUT1 "$pt\t$line2[0]\t$line2[1]\t$line2[7]\t$gene1\t$gene2\t$gene1-$gene2\t$freq\t$freq_bnd\t$ref_gene\n"; } } else { print($gene1 'NNN.......\n') } } } } } sub site2gene { my ($chr, $pos) = @_; my %gene; open REFGENE, "$public_path/hg19_refGene.txt"; my @gene; while () { my @line = split(/\t/, $_); if ($chr eq $line[2] and $pos >= $line[4] and $pos <= $line[5]) { $gene{$line[-4]}++; push @gene, $line[-4] if $gene{$line[-4]} < 2; } } return join("|", @gene); } sub depth { my ($chr1, $pos1, $chr2, $pos2) = @_; my $depth1 = `less $depth_file|grep -w $chr1|grep -w $pos1`; my $depth2 = `less $depth_file|grep -w $chr2|grep -w $pos2`; $depth1 = (split(/\t/, $depth1))[3] if defined $depth1; $depth2 = (split(/\t/, $depth2))[3] if defined $depth2; my $depth; if ($depth1 and $depth2) { $depth = ($depth1 + $depth2) / 2 } elsif ($depth1 and !$depth2) { $depth = $depth1; } elsif (!$depth1 and $depth2) { $depth = $depth2; } return $depth; } sub gene2strand { my $gene = shift @_; return $strand{$gene}; } sub info { open INFO, "$public_path/info.csv"; # 读取并解析表头 my $header = ; chomp($header); my @column_names = split(',', $header); my (@fusion); while () { chomp; my @line = split(/,/, $_); # 将数据与表头对应 my %record; @record{@column_names} = @line; if ($record{'project'} eq $project) { if ($record{'fusion'} ne "NA") { @fusion = split(/\//, $record{'fusion'}); } } } return \@fusion }