pipeline/script/hpd.pl

151 lines
4.5 KiB
Perl
Executable File

#!/usr/bin/perl
use strict;
use warnings;
die "usage:perl hpd.pl output_dir tumor" unless @ARGV==2;
my $output_dir=$ARGV[0];
my $tumor=$ARGV[1];
open CNV,"$output_dir/cnvkit/${tumor}.rmdup.cns";
<CNV>;
open SOMATIC,"$output_dir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
my $out2_head=<SOMATIC>;
open GERM,"$output_dir/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
<GERM>;
open FUSION,"$output_dir/fusion/${tumor}.fusion.reanno.vcf";
open OUT,">$output_dir/HPD/${tumor}.hpd.txt";
print OUT "Gene\tMut\tFreq\tFunc_change\n";
open OUT2,">$output_dir/HPD/${tumor}.hpd.pre.txt";
my @out2;
my @cnv=("MDM2","MDM4","EGFR","B2M","PTEN");
my @cnv_copy=(8,8,8,0,0);
my @cnv_muttype=("扩增","扩增","扩增","缺失","缺失");
my @cnv_function=("致病突变","致病突变","致病突变","疑似致病突变","致病突变");
my @germline=("ATM","ATR","BRCA1","BRCA2","CHEK1","CHEK2","BAP1","ERCC4","POLE","PALB2","RAD51C","RAD51D");
my @somatic=("EGFR","JAK1","JAK2","B2M","CTNNB1","AXIN1","APC","PTEN","STK11","KEAP1","ATM","ATR","BRCA1","BRCA2","CHEK1","CHEK2","BAP1","ERCC4","POLE",
"PALB2","RAD51C","RAD51D","ARID1A","ARID1B","ARID2","EPHA3","EPHA5","EPHA7","NF1","POLD1","LRP1B","NOTCH1","NOTCH2","NOTCH3");
my @fusion=("ALK");
while(<CNV>){
my @line=split("\t");
my @genes=split(",",$line[3]);
my $bool=0;
foreach my $cnv(@cnv){
if(grep{$cnv eq $_}@genes){
my $cn=int(0.5+2**(1+$line[4]));
if(($cnv_muttype[$bool] eq '扩增' and $cn>=$cnv_copy[$bool]) or ($cnv_muttype[$bool] eq '缺失' and $cn<=$cnv_copy[$bool])){
print OUT "$cnv\t$cnv_muttype[$bool]\t-\t$cnv_function[$bool]\n";
$bool++;
}
}
}
}
##oncokb snv_indel 临床意义定义
my %sig=('Inconclusive','意义未明突变','Likely Neutral','疑似良性突变','Likely Oncogenic','疑似致病突变','Oncogenic','致病突变');
my %p_sig;
open SNV_INDEL,"/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
<SNV_INDEL>;
while(<SNV_INDEL>){
my @line=split(",");
my $Sig=$sig{$line[2]};
if($Sig eq "意义未明突变" or $Sig eq "疑似致病突变" or $Sig eq "致病突变"){
$p_sig{$line[0]}{$line[1]}=$Sig;
}
}
while(<SOMATIC>){
my @line=split("\t");
next if $line[0]!=1;
shift @line;
foreach my $somatic(@somatic){
if($line[6] eq $somatic){
my $freq=(split(":",$line[-2]))[-2];
my $FREQ=$freq=~s/%//r;
next if $FREQ<2;
if($line[9] ne '.'){
my $hgvs=$line[9];
my $p=(split(":",$hgvs))[-1];
$p=~s/p\.//;
my $P=$p;
if($p=~/\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion'){
$P='Truncating Mutations';
}
if (my $sig=&p_func($line[6],$P)){
print OUT "$line[6]\tp.$p\t$freq\t$sig\n";
}
}
push @out2,"1\t",join("\t",@line),"\n";
}
}
}
while(<GERM>){
chomp;
my @line=split("\t");
next unless grep{$line[7] eq $_}@germline;
next if $line[0]!~/1|2|3/;
my $sig=($line[0]=='1')?'致病突变':($line[0]=='2')?'疑似致病突变':'意义未明突变';
shift @line;
if($line[9] ne '.'){
my $hgvs=$line[9];
my $p=(split(":",$hgvs))[-1];
my $freq=(split(":",$line[-1]))[-2];
print OUT "$line[6]\t$p\t$freq\t$sig\n";
}
push @out2,"1\t",join("\t",@line),"\n";
}
if(@out2){
print OUT2 $out2_head;
print OUT2 @out2;
}
##fusion
open FU,"/dataseq/jmdna/codes/reportbase/fusion.csv";
<FU>;
my %fusion;
while(<FU>){
chomp;
my @line=split(",");
if(grep{$line[0] eq $_ }@fusion){
$fusion{$line[1]}{$line[0]}=$line[2];
}
}
while(<FUSION>){
next if /^#/;
my @line=split("\t");
if(grep{$_ eq $line[13]}@fusion){
my $revers=join("",$line[14],"-",$line[13]," Fusion");
my ($gene1,$alt)=@line[13,15];
if(not exists $fusion{"$alt Fusion"}{$gene1}){
print OUT "$gene1\t$alt 融合\t-\t疑似致病突变\n" if not exists $fusion{$revers}{$gene1};
}else{
print OUT "$gene1\t$alt 融合\t-\t",$sig{$fusion{"$alt Fusion"}{$gene1}},"\n";
}
}elsif(grep{$_ eq $line[14]}@fusion){
my $revers=join("",$line[14],"-",$line[13]," Fusion");
my ($gene1,$alt)=@line[14,15];
if(not exists $fusion{"$alt Fusion"}{$gene1}){
print OUT "$gene1\t$alt 融合\t-\t疑似致病突变\n" if not exists $fusion{$revers}{$gene1};
}else{
print OUT "$gene1\t$alt 融合\t-\t",$sig{$fusion{"$alt Fusion"}{$gene1}},"\n";
}
}
}
sub p_func{
my ($gene,$p)=@_[0,1];
if(exists $p_sig{$gene}{$p}){
return $p_sig{$gene}{$p};
}else{
return "";
}
}