pipeline/script/targetTherapy.pl

214 lines
8.6 KiB
Perl
Raw Normal View History

2023-08-25 10:06:31 +08:00
#!/usr/bin/perl
use strict;
use warnings;
die "useage:perl $0 name output_dir project cancer_type" unless @ARGV==4;
my ($name,$output_dir,$project,$cancer_type)=@ARGV;
##将点突变信息记录到%mut
open MUT, "/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
<MUT>;
my %mut;
while (<MUT>){
my @line=split(/,/);
$mut{$line[0]}{$line[1]}=$line[2];
}
##将用药信息记录到%therapy
open THERAPY,"/dataseq/jmdna/codes/reportbase/targetTherapy.txt";
my $h1=<THERAPY>;
chomp $h1;
my %therapy;
while(<THERAPY>){
chomp;
my @line=split("\t");
push @{$therapy{$line[0]}{$line[1]}},$_ if ($line[9] ne 'D' and $line[5] ne 'CSCO' and $line[2] !~/Leukemia|Lymphoma|Myeloid/i);
}
##药物翻译信息
open DRUG,"/dataseq/jmdna/codes/reportbase/target_drug.txt";
my %drug;
<DRUG>;
while(<DRUG>){
chomp;
my @line=split(/\t/);
next unless $line[1];
foreach my $drug(split(/\|/,$line[0])){
$drug{lc$drug}=$line[1];
}
}
##疾病翻译信息
open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt";
my (%dis,%dis2);
<DIS>;
while(<DIS>){
chomp;
my @line=split(/\t/);
$dis{lc$line[0]}=$line[1];
push @{$dis2{$line[3]}},$line[0];
push @{$dis2{$line[4]}},$line[0];
}
open IN,"$output_dir/mutation/${name}.target.mutation.txt";
open POS,">$output_dir/mutation/${name}.snvindel.pos.txt";
open NEG,">$output_dir/mutation/${name}.snvindel.neg.txt";
open VUS,">$output_dir/mutation/${name}.snvindel.vus.txt";
my $h2=<IN>;
chomp $h2;
my (@pos,@neg,@vus);
while(<IN>){
chomp;
my @line=split("\t");
$line[9]=~/:exon(\d+):c\.(\S+):p\.(\S+)$/;
my $gene=$line[6];
my $exon=$1;
my $codon=$2;
my $protein;
if($3=~/\d+X$|\d+\*$/ or $line[8] eq 'stopgain' or $line[8] eq 'frameshift deletion' or $line[8] eq 'frameshift insertion'){
$protein='Truncating Mutations';
}else{
$protein=$3;
}
my $mut_type=($codon=~/del/)?("Exon $exon deletion"):($codon=~/ins/)?("Exon $exon insertion"):("Exon $exon mutation");
##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral写入@neg;若基因不存在于%therapy写入@vus;
if (not exists $mut{$gene}{$protein}){
if ($line[15]=~/benign/i and $line[15]!~/sensitivity|pathogenic|uncertain|\./i and $line[14] ne '.'){
push @neg,"$_\t.";
}else{
push @vus,"$_\t.";
}
}else{
if ($mut{$gene}{$protein}=~/neutral/i){
push @neg,"$_\t$mut{$gene}{$protein}";
}elsif ($mut{$gene}{$protein}=~/Inconclusive/i){
push @vus,"$_\t$mut{$gene}{$protein}";
}else{
if (not exists $therapy{$gene}){
if ($line[15]=~/benign/i and $line[15]!~/sensitivity|pathogenic|uncertain|\./i and $line[14] ne '.'){
push @neg,"$_\t$mut{$gene}{$protein}";
}else{
push @vus,"$_\t$mut{$gene}{$protein}";
}
}else{
my $bool=0;
##
if(exists $therapy{$gene}{$protein}){
foreach my $entry(@{$therapy{$gene}{$protein}}){
my @line=split("\t",$entry);
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] ne "solid tumor" and grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}
}
}
##
if(exists $therapy{$gene}{'Mutation'}){
foreach my $entry(@{$therapy{$gene}{'Mutation'}}){
my @line=split("\t",$entry);
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] ne "solid tumor" and grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}
}
}
##
if($protein=~/^(\w\d+)\w$/ and exists $therapy{$gene}{$1}){
foreach my $entry(@{$therapy{$gene}{$1}}){
my @line=split("\t",$entry);
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] ne "solid tumor" and grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}
}
}
#
if($protein=~/^(\w\d+)\w$/ and exists $therapy{$gene}{$1."X"}){
foreach my $entry(@{$therapy{$gene}{$1."X"}}){
my @line=split("\t",$entry);
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] ne "solid tumor" and grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}
}
}
#
if(exists $therapy{$gene}{$mut_type}){
foreach my $entry(@{$therapy{$gene}{$mut_type}}){
my @line=split("\t",$entry);
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc$line[2] ne "solid tumor" and grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}elsif(lc$line[2] eq "solid tumor" or grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1;
}
}
}
if ($bool==0){
if ($line[15]=~/benign/i and $line[15]!~/sensitivity|pathogenic|uncertain|\./i and $line[14] ne '.'){
push @neg,"$_\t$mut{$gene}{$protein}";
}else{
push @vus,"$_\t$mut{$gene}{$protein}";
}
}
}
}
}
}
sub drug{
my $drugs=shift @_;
my @translation;
foreach my $drug(split(/,/,$drugs)){
if ($drug=~/\+/){
my $plus;
foreach my $drug_c(split(/\s+\+\s+/,$drug)){
my $new=(exists $drug{lc$drug_c})?$drug{lc$drug_c}:($drug_c);
$plus.=" + $new";
}
$plus=~s/^ \+ //;
push @translation,$plus;
}else{
my $new=(exists $drug{lc$drug})?$drug{lc$drug}:($drug);
push @translation,$new;
}
}
return(join(",",@translation));
}
my $h=$h2."\tfun_change\t".join("\t",(split("\t",$h1))[0..9,14])."\t标签\t药物中文名\t疾病中文名";
if(@pos){
print POS "$h\n";
print POS join("\n",@pos)."\n";
}
if(@neg){
print NEG $h2."\tfun_change\n";
print NEG join("\n",@neg)."\n";
}
if(@vus){
print VUS $h2."\tfun_change\n";
print VUS join("\n",@vus)."\n";
}