pipeline/codes/indication.pl

107 lines
2.9 KiB
Perl
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/perl
use strict;
use warnings;
die "useage:perl $0 output_dir cancer_type project" unless @ARGV == 3;
my ($output_dir, $cancer_type, $project) = @ARGV;
my $len = length($cancer_type);
if ($len < 4) {
die "cancer_type 非ST开头且4位长度";
}
else {
$cancer_type = substr($cancer_type, 0, 4);
}
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Indication药物注释使用路径$database_path\n";
open OUT, ">$output_dir/mutation/indication.txt";
print OUT "基因\t检测内容\t检测情况\t肿瘤类型\n";
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {
chomp;
my @line = split(/\t/);
$dis{lc $line[2]} = $line[3];
push @{$dis2{$line[0]}}, lc $line[2];
push @id, $line[0];
}
foreach my $ID ($cancer_type) {
my @family;
my @ids = split("", $ID);
for (my $i = 1; $i < @ids; $i = $i + 2) {
push @family, join("", @ids[0 .. $i]);
}
push @family, (grep {/^$ID/} @id);
foreach my $t (@family) {
push @{$dis2{$ID}}, @{$dis2{$t}};
}
}
foreach my $key (keys(%dis2)) {
my %uniq;
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}
##靶向用药信息
open THERAPY, "$database_path/targetTherapy.txt";
<THERAPY>;
my %therapy;
my %cancer;
while (<THERAPY>) {
chomp;
my @line = split("\t");
if ($line[9] eq 'V' and $line[14] eq 'A' and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) and $line[0] !~ /,/) {
push @{$cancer{$line[0]}}, $dis{lc $line[2]} if !(grep {$_ eq $dis{lc $line[2]}} @{$cancer{$line[0]}});
if ($line[1] =~ /fusion/i) {
push @{$therapy{$line[0]}}, '融合' if !(grep {$_ eq '融合'} @{$therapy{$line[0]}});
}
elsif ($line[1] eq "Deletion" or $line[1] =~ /Amplification/) {
push @{$therapy{$line[0]}}, '扩增' if !(grep {$_ eq '扩增'} @{$therapy{$line[0]}});
}
else {
push @{$therapy{$line[0]}}, '突变' if !(grep {$_ eq '突变'} @{$therapy{$line[0]}});;
}
}
}
my $muts_ref = info();
my @muts = @$muts_ref;
for my $gene (sort keys %therapy) {
# 是否检测
if (grep {$_ eq $gene} @muts) {
print OUT "$gene\t", join("/", @{$therapy{$gene}}), "\t未检出变异\t", join("/", @{$cancer{$gene}}), "\n";
}
}
sub info {
open INFO, "$database_path/info.csv";
# 读取并解析表头
my $header = <INFO>;
chomp($header);
my @column_names = split(',', $header);
while (<INFO>) {
chomp;
my @line = split(/,/, $_);
# 将数据与表头对应
my %record;
@record{@column_names} = @line;
if ($record{'project'} eq $project) {
if ($record{'mutation'} ne "NA") {
@muts = split(/\//, $record{'mutation'});
}
}
}
return \@muts
}