#!/usr/bin/perl use strict; use warnings; die "usage:perl hpd.pl output_dir tumor" unless @ARGV==2; my $output_dir=$ARGV[0]; my $tumor=$ARGV[1]; open CNV,"$output_dir/cnvkit/${tumor}.rmdup.cns"; ; open SOMATIC,"$output_dir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt"; my $out2_head=; open GERM,"$output_dir/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"; ; open FUSION,"$output_dir/fusion/${tumor}.fusion.reanno.vcf"; open OUT,">$output_dir/HPD/${tumor}.hpd.txt"; print OUT "Gene\tMut\tFreq\tFunc_change\n"; open OUT2,">$output_dir/HPD/${tumor}.hpd.pre.txt"; my @out2; my @cnv=("MDM2","MDM4","EGFR","B2M","PTEN"); my @cnv_copy=(8,8,8,0,0); my @cnv_muttype=("扩增","扩增","扩增","缺失","缺失"); my @cnv_function=("致病突变","致病突变","致病突变","疑似致病突变","致病突变"); my @germline=("ATM","ATR","BRCA1","BRCA2","CHEK1","CHEK2","BAP1","ERCC4","POLE","PALB2","RAD51C","RAD51D"); my @somatic=("EGFR","JAK1","JAK2","B2M","CTNNB1","AXIN1","APC","PTEN","STK11","KEAP1","ATM","ATR","BRCA1","BRCA2","CHEK1","CHEK2","BAP1","ERCC4","POLE", "PALB2","RAD51C","RAD51D","ARID1A","ARID1B","ARID2","EPHA3","EPHA5","EPHA7","NF1","POLD1","LRP1B","NOTCH1","NOTCH2","NOTCH3"); my @fusion=("ALK"); while(){ my @line=split("\t"); my @genes=split(",",$line[3]); my $bool=0; foreach my $cnv(@cnv){ if(grep{$cnv eq $_}@genes){ my $cn=int(0.5+2**(1+$line[4])); if(($cnv_muttype[$bool] eq '扩增' and $cn>=$cnv_copy[$bool]) or ($cnv_muttype[$bool] eq '缺失' and $cn<=$cnv_copy[$bool])){ print OUT "$cnv\t$cnv_muttype[$bool]\t-\t$cnv_function[$bool]\n"; $bool++; } } } } ##oncokb snv_indel 临床意义定义 my %sig=('Inconclusive','意义未明突变','Likely Neutral','疑似良性突变','Likely Oncogenic','疑似致病突变','Oncogenic','致病突变'); my %p_sig; open SNV_INDEL,"/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv"; ; while(){ my @line=split(","); my $Sig=$sig{$line[2]}; if($Sig eq "意义未明突变" or $Sig eq "疑似致病突变" or $Sig eq "致病突变"){ $p_sig{$line[0]}{$line[1]}=$Sig; } } while(){ my @line=split("\t"); next if $line[0]!=1; shift @line; foreach my $somatic(@somatic){ if($line[6] eq $somatic){ my $freq=(split(":",$line[-2]))[-2]; my $FREQ=$freq=~s/%//r; next if $FREQ<2; if($line[9] ne '.'){ my $hgvs=$line[9]; my $p=(split(":",$hgvs))[-1]; $p=~s/p\.//; my $P=$p; if($p=~/\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion'){ $P='Truncating Mutations'; } if (my $sig=&p_func($line[6],$P)){ print OUT "$line[6]\tp.$p\t$freq\t$sig\n"; } } push @out2,"1\t",join("\t",@line),"\n"; } } } while(){ chomp; my @line=split("\t"); next unless grep{$line[7] eq $_}@germline; next if $line[0]!~/1|2|3/; my $sig=($line[0]=='1')?'致病突变':($line[0]=='2')?'疑似致病突变':'意义未明突变'; shift @line; if($line[9] ne '.'){ my $hgvs=$line[9]; my $p=(split(":",$hgvs))[-1]; my $freq=(split(":",$line[-1]))[-2]; print OUT "$line[6]\t$p\t$freq\t$sig\n"; } push @out2,"1\t",join("\t",@line),"\n"; } if(@out2){ print OUT2 $out2_head; print OUT2 @out2; } ##fusion open FU,"/dataseq/jmdna/codes/reportbase/fusion.csv"; ; my %fusion; while(){ chomp; my @line=split(","); if(grep{$line[0] eq $_ }@fusion){ $fusion{$line[1]}{$line[0]}=$line[2]; } } while(){ next if /^#/; my @line=split("\t"); if(grep{$_ eq $line[13]}@fusion){ my $revers=join("",$line[14],"-",$line[13]," Fusion"); my ($gene1,$alt)=@line[13,15]; if(not exists $fusion{"$alt Fusion"}{$gene1}){ print OUT "$gene1\t$alt 融合\t-\t疑似致病突变\n" if not exists $fusion{$revers}{$gene1}; }else{ print OUT "$gene1\t$alt 融合\t-\t",$sig{$fusion{"$alt Fusion"}{$gene1}},"\n"; } }elsif(grep{$_ eq $line[14]}@fusion){ my $revers=join("",$line[14],"-",$line[13]," Fusion"); my ($gene1,$alt)=@line[14,15]; if(not exists $fusion{"$alt Fusion"}{$gene1}){ print OUT "$gene1\t$alt 融合\t-\t疑似致病突变\n" if not exists $fusion{$revers}{$gene1}; }else{ print OUT "$gene1\t$alt 融合\t-\t",$sig{$fusion{"$alt Fusion"}{$gene1}},"\n"; } } } sub p_func{ my ($gene,$p)=@_[0,1]; if(exists $p_sig{$gene}{$p}){ return $p_sig{$gene}{$p}; }else{ return ""; } }