pipeline/script/tmb.pl

73 lines
2.2 KiB
Perl
Raw Normal View History

2023-08-25 10:06:31 +08:00
#!/usr/bin/perl
use strict;
#use warnings;
my ($output_dir,$tumor)=@ARGV;
die "useage:perl $0 output_dir tumor" unless @ARGV==2;
open IN,"$output_dir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno.txt";
<IN>;
open OUT,">${output_dir}/mutation/${tumor}.tmb.txt";
print OUT "可信\tChr\tStart\tEnd\tRef\tAlt\tExonicFunc\tTranscript\tGene\tBasechange\tAAchange\tfreq\tTotal_reads\tMutant_reads\tStrand_bias\tCOSMICID\tdbSNP\tFilter\tTSG/OG\n";
##TSG_OG
my (%tsg,%og);
open TSG,"/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
<TSG>;
while(<TSG>){
chomp;
my @line=split(",");
if($line[-1] eq 'Yes'){$tsg{$line[0]}=1;}
if($line[-2] eq 'Yes'){$og{$line[0]}=1;}
}
while(<IN>){
chomp;
my @line=split(/\t/,$_);
my $freq=(split(":",$line[-2]))[-2];
my $FREQ=$freq=~s/%//r;
if($FREQ>=5 and $line[5]=~/exonic/ and $line[5]!~/ncRNA/ and $line[8] ne "unknown" and $line[17]<0.01 and ($line[$#line]!~/STR/ or (length($line[3])>=4 or length($line[4])>=4) or
$FREQ>5) and $line[18]<0.01 and $line[19]<0.01 and $line[20]<0.01 and $line[23]<0.01 and $line[28]<0.01 and $line[32]<0.01){
my @hgvs=split(/,/,$line[9]);
my $hgvs=$hgvs[0];
if (my $transcript=&transcript($line[6])){
if(grep{/$transcript/}@hgvs){
$hgvs=(grep{/$transcript/}@hgvs)[0];
}
}
$line[9]=$hgvs;
$hgvs=~/(NM.*):exon\d+:(c.*?):(p\..*)/;
my($transcript,$Basechange,$protein)=($1,$2,$3);
my $COSMICID=(split(";",$line[11]))[0];
if ($COSMICID=~/\./){
$COSMICID="/";
}
my($dp,$vd,$vfr)=(split(":",$line[-2]))[2,4,-1];
$vfr=join(",",(split(",",$vfr))[2,3]);
my $tsg=(exists $tsg{$line[6]} and exists $og{$line[6]})?"TSG/OG":(exists $tsg{$line[6]})?'TSG':(exists $og{$line[6]})?'OG':'.';
my @anno=('1',@line[0,1,2,3,4,8],$transcript,$line[6],$Basechange,$protein,$freq,$dp,$vd,$vfr,$COSMICID,$line[10],$line[101],$tsg);
print OUT join("\t",@anno),"\n";
}
}
sub transcript{
my $gene=shift @_;
open TR,"/dataseq/jmdna/codes/reportbase/oncokbgene.txt";
my %oncogene;
while(<TR>){
chomp;
my @line=split;
$oncogene{$line[0]}=$line[2];
}
if (exists $oncogene{$gene}){
$oncogene{$gene}=~s/\.\d+//;
return $oncogene{$gene};
}else{
print "$gene has no NM id in oncokbgene.txt";
return "";
}
}