pipeline/script/indication.pl

114 lines
3.1 KiB
Perl
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/perl
use strict;
use warnings;
die "useage:perl $0 output_dir cancer_type project" unless @ARGV == 3;
my ($output_dir, $cancer_type, $project) = @ARGV;
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Indication药物注释使用路径$database_path\n";
open OUT, ">$output_dir/mutation/indication.txt";
print OUT "基因\t检测内容\t检测情况\t肿瘤类型\n";
##本癌种FDA/NMPA/NCCN批准基因检测
=pod
##疾病翻译信息
open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt";
my (%dis,%dis2);
<DIS>;
while(<DIS>){
chomp;
my @line=split(/\t/);
$dis{lc$line[0]}=$line[1];
push @{$dis2{$line[3]}},$line[0];
push @{$dis2{$line[4]}},$line[0];
}
=cut
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {
chomp;
my @line = split(/\t/);
$dis{lc $line[2]} = $line[3];
# $dis{lc $line[4]} = $line[5];
push @{$dis2{$line[0]}}, lc $line[2];
# push @{$dis2{$line[0]}}, lc $line[4];
push @id, $line[0];
}
foreach my $ID ($cancer_type) {
my @family;
my @ids = split("", $ID);
for (my $i = 1; $i < @ids; $i = $i + 2) {
push @family, join("", @ids[0 .. $i]);
}
push @family, (grep {/^$ID/} @id);
foreach my $t (@family) {
push @{$dis2{$ID}}, @{$dis2{$t}};
}
}
foreach my $key (keys(%dis2)) {
my %uniq;
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}
##靶向用药信息
open THERAPY, "$database_path/targetTherapy.txt";
<THERAPY>;
my %therapy;
my %cancer;
while (<THERAPY>) {
chomp;
my @line = split("\t");
if ($line[9] eq 'V' and $line[14] eq 'A' and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) and $line[0] !~ /,/) {
push @{$cancer{$line[0]}}, $dis{lc $line[2]} if !(grep {$_ eq $dis{lc $line[2]}} @{$cancer{$line[0]}});
if ($line[1] =~ /fusion/i) {
push @{$therapy{$line[0]}}, '融合' if !(grep {$_ eq '融合'} @{$therapy{$line[0]}});
}
elsif ($line[1] eq "Deletion" or $line[1] =~ /Amplification/) {
push @{$therapy{$line[0]}}, '扩增' if !(grep {$_ eq '扩增'} @{$therapy{$line[0]}});
}
else {
push @{$therapy{$line[0]}}, '突变' if !(grep {$_ eq '突变'} @{$therapy{$line[0]}});;
}
}
}
my $muts_ref = info();
my @muts = @$muts_ref;
for my $gene (sort keys %therapy) {
# 是否检测
if (grep {$_ eq $gene} @muts) {
print OUT "$gene\t", join("/", @{$therapy{$gene}}), "\t未检出变异\t", join("/", @{$cancer{$gene}}), "\n";
}
}
sub info {
open INFO, "$database_path/info.csv";
# 读取并解析表头
my $header = <INFO>;
chomp($header);
my @column_names = split(',', $header);
while (<INFO>) {
chomp;
my @line = split(/,/, $_);
# 将数据与表头对应
my %record;
@record{@column_names} = @line;
if ($record{'project'} eq $project) {
if ($record{'mutation'} ne "NA") {
@muts = split(/\//, $record{'mutation'});
}
}
}
return \@muts
}