pipeline/script/hotspot.filter.pl

72 lines
1.7 KiB
Perl
Executable File

#!/usr/bin/perl
use strict;
use warnings;
die "usage:perl $0 outputDir tumor" if @ARGV!=2;
my (${outputDir},${tumor})=@ARGV;
open IN,"${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno.txt";
open OUT,">${outputDir}/mutation/hotspot/${tumor}.hotspot.snp.indel.anno.hg19_multianno_filter.txt";
open HT,"/dataseq/jmdna/codes/public/hotspot.bed";
my %ht;
while(<HT>){
chomp;
my @line=split;
$ht{$line[3]}=1;
}
my $head=<IN>;
my @ht;
while(<IN>){
chomp;
my @line=split(/\t/);
next if $line[8] eq 'synonymous SNV';
my @hgvs=split(/,/,$line[9]);
my $hgvs=$hgvs[0];
if (my $transcript=&transcript($line[6])){
if(grep{/$transcript/}@hgvs){
$hgvs=(grep{/$transcript/}@hgvs)[0];
}
}
$line[9]=$hgvs;
$hgvs=~/(\S+):(\S+):(\S+):c\.(\S+):p\.(\S+)/;
my($gene,$nm,$exon,$c,$p)=($1,$2,$3,$4,$5);
my $key1=$gene."_".$p;
my ($key2,$key3)=(1,1);
if($p=~/del/){
$key2=$gene."_".$exon."_del";
}elsif($p=~/ins/){
$key2=$gene."_".$exon."_ins";
}elsif($p=~/^(\S\d+)\S/){
$key3=$gene."_".$1;
}
if(exists $ht{$key1} or exists $ht{$key2} or exists $ht{$key3}){
push @ht,join("\t",@line);
}
}
if(@ht){
print OUT "$head";
print OUT join("\n",@ht);
}
sub transcript{
my $gene=shift @_;
open TR,"/dataseq/jmdna/codes/reportbase/oncokbgene.txt";
my %oncogene;
while(<TR>){
chomp;
my @line=split;
$oncogene{$line[0]}=$line[2];
}
if (exists $oncogene{$gene}){
$oncogene{$gene}=~s/\.\d+//;
return $oncogene{$gene};
}else{
print "$gene has no NM id in oncokbgene.txt";
return "";
}
}