213 lines
7.7 KiB
Perl
Executable File
213 lines
7.7 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
use strict;
|
|
#use warnings;
|
|
use List::Util qw(sum);
|
|
|
|
die "useage:perl $0 input project sample_type somtic_out germline_out tag_out " unless @ARGV == 6;
|
|
my ($input, $project, $sample_type, $somtic_out, $germline_out, $tag_out) = @ARGV;
|
|
|
|
# die "useage:perl $0 output_dir tumor project sample_type" unless @ARGV == 4;
|
|
# my ($output_dir, $name, $project, $sample_type) = @ARGV;
|
|
|
|
# open IN, "$output_dir/mutation/${name}.snp.indel.anno.hg19_multianno.txt";
|
|
open IN, "$input";
|
|
my $head = <IN>;
|
|
# if ($sample_type eq 'c') {
|
|
# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt";
|
|
# }
|
|
# elsif ($sample_type eq 't') {
|
|
# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
|
|
# }
|
|
|
|
open OUT, "> $somtic_out";
|
|
print OUT "可信\t$head";
|
|
|
|
# open OUT2, ">${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
|
|
open OUT2, ">$germline_out";
|
|
print OUT2 "临床意义\t$head";
|
|
|
|
# open OUT3, ">${name}.snp.indel.anno.hg19_multianno_tag.txt";
|
|
open OUT3, ">$tag_out";
|
|
print OUT3 "TAG\t$head";
|
|
|
|
##black list
|
|
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
|
|
open BKLT, "$public_path/blacklist.txt";
|
|
my %bk;
|
|
<BKLS>;
|
|
while (<BKLT>) {
|
|
chomp;
|
|
my @line = split("\t");
|
|
my $key = join("_", @line[0 .. 4]);
|
|
$bk{$key} = 1;
|
|
}
|
|
sub blacklist {
|
|
my $pos = shift @_;
|
|
if (exists $bk{$pos}) {
|
|
return "1";
|
|
}
|
|
else {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
open INFO, "$public_path/info.txt";
|
|
my @muts;
|
|
while (<INFO>) {
|
|
chomp;
|
|
my @line = split(/\t/, $_);
|
|
if ($line[0] eq $project) {
|
|
if ($line[2] ne "NA") {
|
|
@muts = split(/\//, $line[2]);
|
|
}
|
|
}
|
|
}
|
|
|
|
while (<IN>) {
|
|
chomp;
|
|
my @line = split(/\t/, $_);
|
|
my $freq = (split(":", $line[-1]))[4];
|
|
# next if $line[9] eq '.';
|
|
if ($line[8] ne "synonymous SNV" and $line[8] ne "unknown") {
|
|
if ($line[17] < 0.01 and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01) {
|
|
if ($line[16] =~ /benign/i and $line[16] !~ /pathogenic|Affects|association|Conflicting|sensitivity|drug|other|risk|protective|Uncertain|not_provided|\./i) {
|
|
print OUT3 "benign\t", join("\t", @line), "\n";
|
|
next;
|
|
}
|
|
if ($sample_type eq 'c') {
|
|
if ($line[11] =~ /OCCURENCE=(\S+)/) {
|
|
my $cosmic = $1;
|
|
$cosmic =~ s/\(\S+?\)//g;
|
|
my @cosmic = split(",", $cosmic);
|
|
$cosmic = sum @cosmic;
|
|
if ($freq < 0.01 and $cosmic <= 1) {
|
|
print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n";
|
|
next;
|
|
}
|
|
}
|
|
if ($freq < 0.01 and $line[11] eq '.') {
|
|
print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n";
|
|
next;
|
|
}
|
|
}
|
|
#blacklist
|
|
my $key = join("_", @line[0 .. 4]);
|
|
if (&blacklist($key)) {
|
|
print OUT3 "blacklist\t", join("\t", @line), "\n";
|
|
next;
|
|
};
|
|
if ($line[9] ne '.') {
|
|
my @hgvs = split(/,/, $line[9]);
|
|
my $hgvs = $hgvs[0];
|
|
$hgvs =~ /(\S+):(\S+):exon(\d+):c\.(\S+):p\.(\S+)$/;
|
|
my $gene = $1;
|
|
if (!(@muts and grep {$gene eq $_} @muts)) {
|
|
print OUT3 "nontarget_gene\t", join("\t", @line), "\n";
|
|
next;
|
|
}
|
|
|
|
if ($line[101] ne 'PASS') {
|
|
my $filter = split(";", $line[101]);
|
|
if ($freq < 0.02 or ($freq >= 0.02 and $freq < 0.05 and $filter >= 2)) {
|
|
print OUT3 "byfilter\t", join("\t", @line), "\n";
|
|
next;
|
|
};
|
|
}
|
|
if (my $transcript = &transcript($gene)) {
|
|
if (grep {/$transcript/} @hgvs) {
|
|
$hgvs = (grep {/$transcript/} @hgvs)[0];
|
|
}
|
|
}
|
|
$line[9] = $hgvs;
|
|
|
|
print OUT "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
|
print OUT3 "PASS\t", join("\t", @line), "\n";
|
|
|
|
if ($freq > 0.1) {
|
|
if ($line[16] =~ /Likely_pathogenic|drug/i) {
|
|
print OUT2 "2\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
|
}
|
|
elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) {
|
|
print OUT2 "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
|
}
|
|
else {
|
|
print OUT2 "3\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
elsif ($line[5] =~ /splicing/) {
|
|
# next if $line[101] ne 'PASS';
|
|
# if ($line[101] ne 'PASS') {
|
|
# print OUT3 "unknow3\t", join("\t", @line), "\n";
|
|
# next;
|
|
# }
|
|
my $gene = (split(";", $line[6]))[0];
|
|
if (!(@muts and grep {$gene eq $_} @muts)) {
|
|
print OUT3 "nontarget_gene\t", join("\t", @line), "\n";
|
|
next;
|
|
};
|
|
my @hgvs = split(/;/, $line[7]);
|
|
my $hgvs = $hgvs[0];
|
|
if (my $transcript = &transcript($gene)) {
|
|
if (grep {/$transcript/} @hgvs) {
|
|
$hgvs = (grep {/$transcript/} @hgvs)[0];
|
|
}
|
|
}
|
|
$hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/;
|
|
my $spl = $3;
|
|
|
|
if ($spl =~ /\d+[\+|\-][1|2]\D+/) {
|
|
$line[7] = join(":", ($gene, $hgvs));
|
|
print OUT3 "PASS\t", join("\t", @line), "\n";
|
|
print OUT "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
|
if ($freq > 0.1) {
|
|
if ($line[16] =~ /Likely_pathogenic|drug/i) {
|
|
print OUT2 "2\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
|
}
|
|
elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) {
|
|
print OUT2 "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
|
}
|
|
else {
|
|
print OUT2 "3\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n";
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
print OUT3 "unknow1\t", join("\t", @line), "\n";
|
|
}
|
|
|
|
}
|
|
else {
|
|
print OUT3 "unknow2\t", join("\t", @line), "\n";
|
|
}
|
|
}
|
|
else {
|
|
print OUT3 "common_snp\t", join("\t", @line), "\n";
|
|
}
|
|
}
|
|
else {
|
|
print OUT3 "synonymous\t", join("\t", @line), "\n";
|
|
}
|
|
}
|
|
|
|
sub transcript {
|
|
my $gene = shift @_;
|
|
my $data_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase/";
|
|
open TR, "$data_path/oncokbgene.txt";
|
|
my %oncogene;
|
|
while (<TR>) {
|
|
chomp;
|
|
my @line = split;
|
|
$oncogene{$line[0]} = $line[2];
|
|
}
|
|
if (exists $oncogene{$gene}) {
|
|
$oncogene{$gene} =~ s/\.\d+//;
|
|
return $oncogene{$gene};
|
|
}
|
|
else {
|
|
print "$gene has no NM id in oncokbgene.txt";
|
|
return "";
|
|
}
|
|
}
|