pipeline/script/hrr_controlsample_tissue.pl

126 lines
4.2 KiB
Perl
Executable File

#!/usr/bin/perl
#use strict;
#use warnings;
die "usage:perl $0 outputDir tumor" unless @ARGV==2;
my ($outputDir,$tumor)=@ARGV;
my @hrr1=("ATM","BARD1","BRCA1","BRCA2","BRIP1","CDK12","CHEK1","CHEK2","FANCL","PALB2","RAD51B","RAD51C","RAD51D","RAD54L");
my @hrr2=("PPP2R2A","ARID1A","ATR","ATRX","BAP1","BLM","FANCA","FANCC","FANCD2","FANCE","FANCF","FANCG","FANCI","MDC1","MRE11","NBN","PTEN","RAD50","RAD51","XRCC2");
my @hrr3=("ATM","BARD1","BRCA1","BRCA2","BRIP1","CDK12","CHEK1","CHEK2","FANCL","PALB2","PPP2R2A","RAD51B","RAD51C","RAD51D","RAD54L","ARID1A","ATR","ATRX","BAP1","BLM","FANCA","FANCC","FANCD2","FANCE","FANCF","FANCG","FANCI","MDC1","MRE11","NBN","PTEN","RAD50","RAD51","XRCC2");
open OUT,">$outputDir/HRR/${tumor}_hrr.txt";
open IN1, "$outputDir/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
open HRRPRE, ">$outputDir/HRR/${tumor}.hrr.pre.txt";
my $head=<IN1>;
chomp $head;
$head=join("\t",("胚系/体系",split("\t",$head)))."\n";
my (@content,@hrrpre);
while(<IN1>){
chomp;
my @line=split("\t");
next unless grep{$line[7] eq $_}@hrr3;
push @hrrpre,join("\t",("胚系",@line));
next if $line[0]!~/1|2|3/;
my $sig=($line[0]=='1')?'致病突变':($line[0]=='2')?'疑似致病突变':'意义未明突变';
shift @line;
if($line[9] ne '.'){
my $hgvs=$line[9];
$hgvs=~/:(NM_\d+):exon\d+:(c\.\S+):(p\.\S+)$/;
my $gene=$line[6];
my $tr=$1;
my $codon=$2;
my $protein=$3;
# my $freq=(split(":",$line[-1]))[4];
my $freq=(split(":",$line[-2]))[-2];
my $FREQ=$freq=~s/%//r;
next if $FREQ<10;
my $genotype=($FREQ<=90?"杂合型":"纯合型");
# $freq=join("",($freq*100,'%'));
my $muttype=($line[8]=~/nonsynonymous SNV/)?'错义突变':($line[8]=~/^frameshift/)?'移码突变':($line[8]=~/^nonframeshift/)?'非移码突变':($line[8]=~/stopgain/)?'提前终止':'未知';
push @content,join("\t",($gene,$tr,$codon,$protein,$muttype,$genotype,$sig));
}
}
##somatic
##oncokb snv_indel 临床意义定义
my %sig=('Inconclusive','意义未明突变','Likely Neutral','疑似良性突变','Neutral','良性突变','Likely Oncogenic','疑似致病突变','Oncogenic','致病突变','Resistance','致病突变');
my %p_sig;
open SNV_INDEL,"/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
<SNV_INDEL>;
while(<SNV_INDEL>){
my @line=split(",");
my $Sig=$sig{$line[2]};
# if($Sig eq "意义未明突变" or $Sig eq "疑似致病突变" or $Sig eq "致病突变"){
$p_sig{$line[0]}{$line[1]}=$Sig;
# }
}
sub p_func{
my ($gene,$p)=@_[0,1];
if(exists $p_sig{$gene}{$p}){
return $p_sig{$gene}{$p};
}else{
return "";
}
}
open IN2, "$outputDir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
<IN2>;
while(<IN2>){
chomp;
my @line=split("\t");
next if $line[0] ne '1';
next unless grep{$line[7] eq $_}@hrr3;
shift @line;
if($line[9] ne '.'){
my $sig;
my $hgvs=$line[9];
$hgvs=~/:(NM_\d+):exon\d+:(c\.\S+):(p\.\S+)$/;
my $gene=$line[6];
my $tr=$1;
my $codon=$2;
my $p=$3;
$p=~s/p\.//;
my $P=$p;
if($p=~/\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion'){
$P='Truncating Mutations';
}
my $freq=(split(":",$line[-2]))[-2];
my $muttype=($line[8]=~/nonsynonymous SNV/)?'错义突变':($line[8]=~/^frameshift/)?'移码突变':($line[8]=~/^nonframeshift/)?'非移码突变':($line[8]=~/stopgain/)?'提前终止':'未知';
if(&p_func($line[6],$P)){
$sig=&p_func($line[6],$P);
if($sig eq "疑似良性突变" or $sig eq "良性突变"){
$sig="IIII级";
}elsif($sig eq "致病突变" or $sig eq "疑似致病突变"){
if(grep{$gene eq $_}@hrr1){
# print "$gene is I级\n";
$sig="I级";
}elsif(grep{$gene eq $_}@hrr2){
# print "$gene is II级\n:";
$sig="II级";
}
}else{
$sig="III级";
}
}else{
$sig="III级";
}
push @content,join("\t",($gene,$tr,$codon,"p.$p",$muttype,$freq,$sig)) if $sig ne "IIII级";
push @hrrpre,join("\t",("体系",$sig,@line));
}
}
if (@content){
print OUT "gene\ttranscript\tc_change\tp_change\tmuttype\tfreq\tsig\n";
print OUT join("\n",@content);
}
if (@hrrpre){
print HRRPRE $head;
print HRRPRE join("\n",@hrrpre)."\n";
}