pipeline/script/pick_variant.pl

90 lines
2.8 KiB
Perl

#!/usr/bin/perl
use strict;
#use warnings;
my ($output_dir,$name)=@ARGV;
die "useage:perl $0 output_dir tumor" unless @ARGV==2;
open IN,"$output_dir/mutation/${name}.snp.indel.somatic.anno.hg19_multianno.txt";
my $head=<IN>;
open OUT,">$output_dir/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt";
print OUT "可信\t$head";
while(<IN>){
chomp;
my @line=split(/\t/,$_);
my $freq=(split(":",$line[-2]))[-5];
my $FREQ=$freq=~s/%//r;
next if $line[9] eq '.';
my @hgvs=split(/,/,$line[9]);
my $hgvs=$hgvs[0];
$hgvs=~/(\S+):(\S+):exon(\d+):c\.(\S+):p\.(\S+)$/;
my $gene=$1;
if($line[8] ne "synonymous SNV" and $line[8] ne "unknown" and $line[17]<0.01 and ($line[$#line]!~/STR/ or (length($line[3])>=4 or length($line[4])>=4) or
$FREQ>5) and $line[18]<0.01 and $line[19]<0.01 and $line[20]<0.01 and $line[23]<0.01 and $line[28]<0.01 and $line[32]<0.01){
if (my $transcript=&transcript($gene)){
if(grep{/$transcript/}@hgvs){
$hgvs=(grep{/$transcript/}@hgvs)[0];
}
}
$line[9]=$hgvs;
print OUT "1\t",join("\t",(@line[0..4],"exonic",$gene,@line[7..$#line])),"\n";
}
}
##$line[100] eq 'PASS'
open IN2,"$output_dir/mutation/${name}.snp.indel.germline.anno.hg19_multianno.txt";
my $head=<IN2>;
open OUT2,">$output_dir/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
print OUT2 "临床意义\t$head";
while(<IN2>){
chomp;
my @line=split(/\t/,$_);
my $freq=(split(":",$line[-2]))[-2];
my $FREQ=$freq=~s/%//r;
next if $line[9] eq '.';
my @hgvs=split(/,/,$line[9]);
my $hgvs=$hgvs[0];
$hgvs=~/(\S+):(\S+):exon(\d+):c\.(\S+):p\.(\S+)$/;
my $gene=$1;
next if ($FREQ<10);
if($line[8] ne "synonymous SNV" and $line[8] ne "unknown" and $line[18]<0.01 and $line[19]<0.01 and $line[20]<0.01
and $line[23]<0.01 and $line[28]<0.01 and $line[32]<0.01 and $line[16]=~/pathogenic|Affects|association|Conflicting|sensitivity|drug|other|risk|protective|Uncertain|not_provided|\./i){
if (my $transcript=&transcript($gene)){
if(grep{/$transcript/}@hgvs){
$hgvs=(grep{/$transcript/}@hgvs)[0];
}
}
$line[9]=$hgvs;
if($line[16]=~/Likely_pathogenic|drug/i){
print OUT2 "2\t",join("\t",(@line[0..4],"exonic",$gene,@line[7..$#line])),"\n";
}elsif($line[16]=~/pathogenic/i and $line[16]!~/Conflicting/i){
print OUT2 "1\t",join("\t",(@line[0..4],"exonic",$gene,@line[7..$#line])),"\n";
}else{
print OUT2 "3\t",join("\t",(@line[0..4],"exonic",$gene,@line[7..$#line])),"\n";
}
}
}
sub transcript{
my $gene=shift @_;
open TR,"/dataseq/jmdna/codes/reportbase/oncokbgene.txt";
my %oncogene;
while(<TR>){
chomp;
my @line=split;
$oncogene{$line[0]}=$line[2];
}
if (exists $oncogene{$gene}){
$oncogene{$gene}=~s/\.\d+//;
return $oncogene{$gene};
}else{
print "$gene has no NM id in oncokbgene.txt";
return "";
}
}