pipeline/script/indication.pl

85 lines
2.4 KiB
Perl
Raw Normal View History

2023-11-29 15:13:30 +08:00
#!/usr/bin/perl
use strict;
use warnings;
2023-11-30 15:31:35 +08:00
my ($output_dir, $cancer_type) = @ARGV;
die "useage:perl $0 output_dir cancer_type" unless @ARGV == 2;
2023-11-29 15:13:30 +08:00
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Indication药物注释使用路径$database_path\n";
2023-11-30 15:31:35 +08:00
open OUT, ">$output_dir/mutation/indication.txt";
2023-11-29 15:13:30 +08:00
print OUT "基因\t检测内容\t检测情况\t肿瘤类型\n";
##本癌种FDA/NMPA/NCCN批准基因检测
=pod
##疾病翻译信息
open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt";
my (%dis,%dis2);
<DIS>;
while(<DIS>){
chomp;
my @line=split(/\t/);
$dis{lc$line[0]}=$line[1];
push @{$dis2{$line[3]}},$line[0];
push @{$dis2{$line[4]}},$line[0];
}
=cut
2023-11-30 15:31:35 +08:00
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
2023-11-29 15:13:30 +08:00
<DIS>;
2023-11-30 15:31:35 +08:00
my (%dis, @id, %dis2);
while (<DIS>) {
chomp;
my @line = split(/\t/);
$dis{lc $line[2]} = $line[3];
# $dis{lc $line[4]} = $line[5];
push @{$dis2{$line[0]}}, lc $line[2];
# push @{$dis2{$line[0]}}, lc $line[4];
push @id, $line[0];
2023-11-29 15:13:30 +08:00
}
2023-11-30 15:31:35 +08:00
foreach my $ID ($cancer_type) {
my @family;
my @ids = split("", $ID);
for (my $i = 1; $i < @ids; $i = $i + 2) {
push @family, join("", @ids[0 .. $i]);
}
push @family, (grep {/^$ID/} @id);
foreach my $t (@family) {
push @{$dis2{$ID}}, @{$dis2{$t}};
}
2023-11-29 15:13:30 +08:00
}
2023-11-30 15:31:35 +08:00
foreach my $key (keys(%dis2)) {
my %uniq;
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
2023-11-29 15:13:30 +08:00
}
##靶向用药信息
2023-11-30 15:31:35 +08:00
open THERAPY, "$database_path/targetTherapy.txt";
<THERAPY>;
2023-11-29 15:13:30 +08:00
my %therapy;
my %cancer;
2023-11-30 15:31:35 +08:00
while (<THERAPY>) {
chomp;
my @line = split("\t");
if ($line[9] eq 'V' and $line[14] eq 'A' and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) and $line[0] !~ /,/) {
push @{$cancer{$line[0]}}, $dis{lc $line[2]} if !(grep {$_ eq $dis{lc $line[2]}} @{$cancer{$line[0]}});
if ($line[1] =~ /fusion/i) {
push @{$therapy{$line[0]}}, '融合' if !(grep {$_ eq '融合'} @{$therapy{$line[0]}});
}
elsif ($line[1] eq "Deletion" or $line[1] =~ /Amplification/) {
push @{$therapy{$line[0]}}, '扩增' if !(grep {$_ eq '扩增'} @{$therapy{$line[0]}});
}
else {
push @{$therapy{$line[0]}}, '突变' if !(grep {$_ eq '突变'} @{$therapy{$line[0]}});;
}
}
2023-11-29 15:13:30 +08:00
}
2023-11-30 15:31:35 +08:00
for my $gene (sort keys %therapy) {
print OUT "$gene\t", join("/", @{$therapy{$gene}}), "\t未检出变异\t", join("/", @{$cancer{$gene}}), "\n";
2023-11-29 15:13:30 +08:00
}