pipeline/script/indication.pl

83 lines
2.3 KiB
Perl
Raw Normal View History

2023-11-29 15:13:30 +08:00
#!/usr/bin/perl
use strict;
use warnings;
my ($output_dir,$cancer_type)=@ARGV;
die "useage:perl $0 output_dir cancer_type" unless @ARGV==2;
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Indication药物注释使用路径$database_path\n";
open OUT,">$output_dir/mutation/indication.txt";
print OUT "基因\t检测内容\t检测情况\t肿瘤类型\n";
##本癌种FDA/NMPA/NCCN批准基因检测
=pod
##疾病翻译信息
open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt";
my (%dis,%dis2);
<DIS>;
while(<DIS>){
chomp;
my @line=split(/\t/);
$dis{lc$line[0]}=$line[1];
push @{$dis2{$line[3]}},$line[0];
push @{$dis2{$line[4]}},$line[0];
}
=cut
open DIS,"/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis,@id,%dis2);
while(<DIS>){
chomp;
my @line=split(/\t/);
$dis{lc$line[2]}=$line[3];
$dis{lc$line[4]}=$line[5];
push @{$dis2{$line[0]}},lc$line[2];
push @{$dis2{$line[0]}},lc$line[4];
push @id,$line[0];
}
foreach my $ID($cancer_type){
my @family;
my @ids=split("",$ID);
for (my $i=1;$i<@ids;$i=$i+2){
push @family,join("",@ids[0..$i]);
}
push @family,(grep{/^$ID/}@id);
foreach my $t(@family){
push @{$dis2{$ID}},@{$dis2{$t}};
}
}
foreach my $key(keys(%dis2)){
my %uniq;
@{$dis2{$key}}=grep{++$uniq{$_}<2}@{$dis2{$key}};
}
##靶向用药信息
open THERAPY,"$database_path/targetTherapy.txt";
<THERAPY>;
my %therapy;
my %cancer;
while(<THERAPY>){
chomp;
my @line=split("\t");
if($line[9] eq 'V' and $line[14] eq 'A' and (grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}) and $line[0]!~/,/){
push @{$cancer{$line[0]}},$dis{lc$line[2]} if !(grep{$_ eq $dis{lc$line[2]}}@{$cancer{$line[0]}});
if($line[1]=~/fusion/i){
push @{$therapy{$line[0]}},'融合' if !(grep{$_ eq '融合'}@{$therapy{$line[0]}});
}elsif($line[1] eq "Deletion" or $line[1]=~ /Amplification/){
push @{$therapy{$line[0]}},'扩增' if !(grep{$_ eq '扩增'}@{$therapy{$line[0]}});
}else{
push @{$therapy{$line[0]}},'突变' if !(grep{$_ eq '突变'}@{$therapy{$line[0]}});;
}
}
}
for my $gene(sort keys %therapy){
print OUT "$gene\t",join("/",@{$therapy{$gene}}),"\t未检出变异\t",join("/",@{$cancer{$gene}}),"\n";
}