pipeline/script/hrr_controlsample.pl

79 lines
2.7 KiB
Perl
Raw Normal View History

2023-08-25 10:06:31 +08:00
#!/usr/bin/perl
#use strict;
#use warnings;
die "usage:perl $0 outputDir tumor" unless @ARGV==2;
my ($outputDir,$tumor)=@ARGV;
my @hhr=("ATM","BARD1","BRCA1","BRCA2","BRIP1","CDK12","CHEK1","CHEK2","FANCL","PALB2","PPP2R2A","RAD51B","RAD51C","RAD51D","RAD54L","ARID1A","ATR","ATRX",
"BAP1","BLM","FANCA","FANCC","FANCD2","FANCE","FANCF","FANCG","FANCI","MDC1","MRE11","NBN","PTEN","RAD50","RAD51","XRCC2");
open OUT,">$outputDir/HRR/${tumor}_hrr.txt";
open IN1, "$outputDir/mutation/${tumor}.snp.indel.Germline.anno.hg19_multianno_filtered.txt";
open HRRPRE, ">$outputDir/HRR/${tumor}.hrr.pre.txt";
my $head=<IN1>;
my (@content,@hrrpre);
while(<IN1>){
chomp;
my @line=split("\t");
next unless grep{$line[7] eq $_}@hhr;
push @hrrpre,$_;
next if $line[0]!~/1|2|3/;
my $sig=($line[0]=='1')?'致病突变':($line[0]=='2')?'疑似致病突变':'意义未明突变';
shift @line;
if($line[9] ne '.'){
my $hgvs=$line[9];
$hgvs=~/:(NM_\d+):exon\d+:(c\.\S+):(p\.\S+)$/;
my $gene=$line[6];
my $tr=$1;
my $codon=$2;
my $protein=$3;
my $freq=(split(":",$line[-1]))[-2];
my $muttype=($line[8]=~/nonsynonymous SNV/)?'错义突变':($line[8]=~/^frameshift/)?'移码突变':($line[8]=~/^nonframeshift/)?'非移码突变':($line[8]=~/stopgain/)?'提前终止':'未知';
push @content,join("\t",($gene,$tr,$codon,$protein,$freq,$muttype,$sig));
}
}
open IN2, "$outputDir/mutation/${tumor}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt";
<IN2>;
while(<IN2>){
chomp;
my @line=split("\t");
next if $line[0] ne '1';
next unless grep{$line[7] eq $_}@hhr;
my $sig;
if($line[17]=~/Affects|association|Conflicting|sensitivity|drug|other|risk|protective|Uncertain|not_provided|\./i){
if($line[17]=~/Likely_pathogenic|drug/i){
$sig="疑似致病突变";
}elsif($line[17]=~/pathogenic/i and $line[17]!~/Conflicting/i){
$sig="致病突变";
}else{
$sig="意义未明突变";
}
}
next unless $sig;
shift @line;
if($line[9] ne '.'){
my $hgvs=$line[9];
$hgvs=~/:(NM_\d+):exon\d+:(c\.\S+):(p\.\S+)$/;
my $gene=$line[6];
my $tr=$1;
my $codon=$2;
my $protein=$3;
my $freq=(split(":",$line[-2]))[-2];
my $FREQ=$freq=~s/%//r;
next if $FREQ<3;
my $muttype=($line[8]=~/nonsynonymous SNV/)?'错义突变':($line[8]=~/^frameshift/)?'移码突变':($line[8]=~/^nonframeshift/)?'非移码突变':($line[8]=~/stopgain/)?'提前终止':'未知';
push @content,join("\t",($gene,$tr,$codon,$protein,$freq,$muttype,$sig));
}
}
if (@content){
print OUT "gene\ttranscript\tc_change\tp_change\tfreq\tmuttype\tsig\n";
print OUT join("\n",@content);
}
if (@hrrpre){
print HRRPRE $head;
print HRRPRE join("\n",@hrrpre)."\n";
}