151 lines
4.5 KiB
Perl
151 lines
4.5 KiB
Perl
|
|
#!/usr/bin/perl
|
||
|
|
use strict;
|
||
|
|
use warnings;
|
||
|
|
|
||
|
|
die "usage:perl hpd.pl output_dir tumor" unless @ARGV==2;
|
||
|
|
my $output_dir=$ARGV[0];
|
||
|
|
my $tumor=$ARGV[1];
|
||
|
|
|
||
|
|
open CNV,"$output_dir/cnvkit/${tumor}.rmdup.cns";
|
||
|
|
<CNV>;
|
||
|
|
open SOMATIC,"$output_dir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
|
||
|
|
my $out2_head=<SOMATIC>;
|
||
|
|
open GERM,"$output_dir/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
|
||
|
|
<GERM>;
|
||
|
|
open FUSION,"$output_dir/fusion/${tumor}.fusion.reanno.vcf";
|
||
|
|
|
||
|
|
|
||
|
|
open OUT,">$output_dir/HPD/${tumor}.hpd.txt";
|
||
|
|
print OUT "Gene\tMut\tFreq\tFunc_change\n";
|
||
|
|
open OUT2,">$output_dir/HPD/${tumor}.hpd.pre.txt";
|
||
|
|
my @out2;
|
||
|
|
my @cnv=("MDM2","MDM4","EGFR","B2M","PTEN");
|
||
|
|
my @cnv_copy=(8,8,8,0,0);
|
||
|
|
my @cnv_muttype=("扩增","扩增","扩增","缺失","缺失");
|
||
|
|
my @cnv_function=("致病突变","致病突变","致病突变","疑似致病突变","致病突变");
|
||
|
|
my @germline=("ATM","ATR","BRCA1","BRCA2","CHEK1","CHEK2","BAP1","ERCC4","POLE","PALB2","RAD51C","RAD51D");
|
||
|
|
my @somatic=("EGFR","JAK1","JAK2","B2M","CTNNB1","AXIN1","APC","PTEN","STK11","KEAP1","ATM","ATR","BRCA1","BRCA2","CHEK1","CHEK2","BAP1","ERCC4","POLE",
|
||
|
|
"PALB2","RAD51C","RAD51D","ARID1A","ARID1B","ARID2","EPHA3","EPHA5","EPHA7","NF1","POLD1","LRP1B","NOTCH1","NOTCH2","NOTCH3");
|
||
|
|
my @fusion=("ALK");
|
||
|
|
|
||
|
|
|
||
|
|
while(<CNV>){
|
||
|
|
my @line=split("\t");
|
||
|
|
my @genes=split(",",$line[3]);
|
||
|
|
my $bool=0;
|
||
|
|
foreach my $cnv(@cnv){
|
||
|
|
if(grep{$cnv eq $_}@genes){
|
||
|
|
my $cn=int(0.5+2**(1+$line[4]));
|
||
|
|
if(($cnv_muttype[$bool] eq '扩增' and $cn>=$cnv_copy[$bool]) or ($cnv_muttype[$bool] eq '缺失' and $cn<=$cnv_copy[$bool])){
|
||
|
|
print OUT "$cnv\t$cnv_muttype[$bool]\t-\t$cnv_function[$bool]\n";
|
||
|
|
$bool++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
##oncokb snv_indel 临床意义定义
|
||
|
|
my %sig=('Inconclusive','意义未明突变','Likely Neutral','疑似良性突变','Likely Oncogenic','疑似致病突变','Oncogenic','致病突变');
|
||
|
|
my %p_sig;
|
||
|
|
open SNV_INDEL,"/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
|
||
|
|
<SNV_INDEL>;
|
||
|
|
while(<SNV_INDEL>){
|
||
|
|
my @line=split(",");
|
||
|
|
my $Sig=$sig{$line[2]};
|
||
|
|
if($Sig eq "意义未明突变" or $Sig eq "疑似致病突变" or $Sig eq "致病突变"){
|
||
|
|
$p_sig{$line[0]}{$line[1]}=$Sig;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
while(<SOMATIC>){
|
||
|
|
my @line=split("\t");
|
||
|
|
next if $line[0]!=1;
|
||
|
|
shift @line;
|
||
|
|
foreach my $somatic(@somatic){
|
||
|
|
if($line[6] eq $somatic){
|
||
|
|
my $freq=(split(":",$line[-2]))[-2];
|
||
|
|
my $FREQ=$freq=~s/%//r;
|
||
|
|
next if $FREQ<2;
|
||
|
|
if($line[9] ne '.'){
|
||
|
|
my $hgvs=$line[9];
|
||
|
|
my $p=(split(":",$hgvs))[-1];
|
||
|
|
$p=~s/p\.//;
|
||
|
|
my $P=$p;
|
||
|
|
if($p=~/\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion'){
|
||
|
|
$P='Truncating Mutations';
|
||
|
|
}
|
||
|
|
if (my $sig=&p_func($line[6],$P)){
|
||
|
|
print OUT "$line[6]\tp.$p\t$freq\t$sig\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
push @out2,"1\t",join("\t",@line),"\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
while(<GERM>){
|
||
|
|
chomp;
|
||
|
|
my @line=split("\t");
|
||
|
|
next unless grep{$line[7] eq $_}@germline;
|
||
|
|
next if $line[0]!~/1|2|3/;
|
||
|
|
my $sig=($line[0]=='1')?'致病突变':($line[0]=='2')?'疑似致病突变':'意义未明突变';
|
||
|
|
shift @line;
|
||
|
|
if($line[9] ne '.'){
|
||
|
|
my $hgvs=$line[9];
|
||
|
|
my $p=(split(":",$hgvs))[-1];
|
||
|
|
my $freq=(split(":",$line[-1]))[-2];
|
||
|
|
print OUT "$line[6]\t$p\t$freq\t$sig\n";
|
||
|
|
}
|
||
|
|
push @out2,"1\t",join("\t",@line),"\n";
|
||
|
|
}
|
||
|
|
|
||
|
|
if(@out2){
|
||
|
|
print OUT2 $out2_head;
|
||
|
|
print OUT2 @out2;
|
||
|
|
}
|
||
|
|
|
||
|
|
##fusion
|
||
|
|
open FU,"/dataseq/jmdna/codes/reportbase/fusion.csv";
|
||
|
|
<FU>;
|
||
|
|
my %fusion;
|
||
|
|
while(<FU>){
|
||
|
|
chomp;
|
||
|
|
my @line=split(",");
|
||
|
|
if(grep{$line[0] eq $_ }@fusion){
|
||
|
|
$fusion{$line[1]}{$line[0]}=$line[2];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
while(<FUSION>){
|
||
|
|
next if /^#/;
|
||
|
|
my @line=split("\t");
|
||
|
|
if(grep{$_ eq $line[13]}@fusion){
|
||
|
|
my $revers=join("",$line[14],"-",$line[13]," Fusion");
|
||
|
|
my ($gene1,$alt)=@line[13,15];
|
||
|
|
if(not exists $fusion{"$alt Fusion"}{$gene1}){
|
||
|
|
print OUT "$gene1\t$alt 融合\t-\t疑似致病突变\n" if not exists $fusion{$revers}{$gene1};
|
||
|
|
}else{
|
||
|
|
print OUT "$gene1\t$alt 融合\t-\t",$sig{$fusion{"$alt Fusion"}{$gene1}},"\n";
|
||
|
|
}
|
||
|
|
}elsif(grep{$_ eq $line[14]}@fusion){
|
||
|
|
my $revers=join("",$line[14],"-",$line[13]," Fusion");
|
||
|
|
my ($gene1,$alt)=@line[14,15];
|
||
|
|
if(not exists $fusion{"$alt Fusion"}{$gene1}){
|
||
|
|
print OUT "$gene1\t$alt 融合\t-\t疑似致病突变\n" if not exists $fusion{$revers}{$gene1};
|
||
|
|
}else{
|
||
|
|
print OUT "$gene1\t$alt 融合\t-\t",$sig{$fusion{"$alt Fusion"}{$gene1}},"\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
sub p_func{
|
||
|
|
my ($gene,$p)=@_[0,1];
|
||
|
|
if(exists $p_sig{$gene}{$p}){
|
||
|
|
return $p_sig{$gene}{$p};
|
||
|
|
}else{
|
||
|
|
return "";
|
||
|
|
}
|
||
|
|
}
|