73 lines
2.2 KiB
Perl
Executable File
73 lines
2.2 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
use strict;
|
|
#use warnings;
|
|
|
|
my ($output_dir,$tumor)=@ARGV;
|
|
die "useage:perl $0 output_dir tumor" unless @ARGV==2;
|
|
|
|
open IN,"$output_dir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt";
|
|
<IN>;
|
|
open OUT,">${output_dir}/mutation/${tumor}.tmb.txt";
|
|
print OUT "可信\tChr\tStart\tEnd\tRef\tAlt\tExonicFunc\tTranscript\tGene\tBasechange\tAAchange\tfreq\tTotal_reads\tMutant_reads\tStrand_bias\tCOSMICID\tdbSNP\tFilter\tTSG/OG\n";
|
|
|
|
##TSG_OG
|
|
my (%tsg,%og);
|
|
open TSG,"/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
|
|
<TSG>;
|
|
while(<TSG>){
|
|
chomp;
|
|
my @line=split(",");
|
|
if($line[-1] eq 'Yes'){$tsg{$line[0]}=1;}
|
|
if($line[-2] eq 'Yes'){$og{$line[0]}=1;}
|
|
}
|
|
|
|
|
|
|
|
while(<IN>){
|
|
chomp;
|
|
my @line=split(/\t/,$_);
|
|
my $freq=(split(":",$line[-2]))[-2];
|
|
my $FREQ=$freq=~s/%//r;
|
|
if($FREQ>=5 and $line[5]=~/exonic/ and $line[5]!~/ncRNA/ and $line[8] ne "unknown" and $line[17]<0.01 and ($line[$#line]!~/STR/ or (length($line[3])>=4 or length($line[4])>=4) or
|
|
$FREQ>5) and $line[18]<0.01 and $line[19]<0.01 and $line[20]<0.01 and $line[23]<0.01 and $line[28]<0.01 and $line[32]<0.01){
|
|
my @hgvs=split(/,/,$line[9]);
|
|
my $hgvs=$hgvs[0];
|
|
if (my $transcript=&transcript($line[6])){
|
|
if(grep{/$transcript/}@hgvs){
|
|
$hgvs=(grep{/$transcript/}@hgvs)[0];
|
|
}
|
|
}
|
|
$line[9]=$hgvs;
|
|
$hgvs=~/(NM.*):exon\d+:(c.*?):(p\..*)/;
|
|
my($transcript,$Basechange,$protein)=($1,$2,$3);
|
|
my $COSMICID=(split(";",$line[11]))[0];
|
|
if ($COSMICID=~/\./){
|
|
$COSMICID="/";
|
|
}
|
|
my($dp,$vd,$vfr)=(split(":",$line[-2]))[2,4,-1];
|
|
$vfr=join(",",(split(",",$vfr))[2,3]);
|
|
my $tsg=(exists $tsg{$line[6]} and exists $og{$line[6]})?"TSG/OG":(exists $tsg{$line[6]})?'TSG':(exists $og{$line[6]})?'OG':'.';
|
|
my @anno=('1',@line[0,1,2,3,4,8],$transcript,$line[6],$Basechange,$protein,$freq,$dp,$vd,$vfr,$COSMICID,$line[10],$line[101],$tsg);
|
|
print OUT join("\t",@anno),"\n";
|
|
}
|
|
}
|
|
|
|
|
|
sub transcript{
|
|
my $gene=shift @_;
|
|
open TR,"/dataseq/jmdna/codes/reportbase/oncokbgene.txt";
|
|
my %oncogene;
|
|
while(<TR>){
|
|
chomp;
|
|
my @line=split;
|
|
$oncogene{$line[0]}=$line[2];
|
|
}
|
|
if (exists $oncogene{$gene}){
|
|
$oncogene{$gene}=~s/\.\d+//;
|
|
return $oncogene{$gene};
|
|
}else{
|
|
print "$gene has no NM id in oncokbgene.txt";
|
|
return "";
|
|
}
|
|
}
|