pipeline/codes/indication.pl

#!/usr/bin/perl
use strict;
use warnings;

die "useage:perl $0 output_dir cancer_type project" unless @ARGV == 3;
my ($output_dir, $cancer_type, $project) = @ARGV;

my $len = length($cancer_type);
if ($len < 4) {
    die "cancer_type 非ST开头且4位长度";
}
else {
    $cancer_type = substr($cancer_type, 0, 4);
}

my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";

print "Indication药物注释使用路径：$database_path\n";

open OUT, ">$output_dir/mutation/indication.txt";
print OUT "基因\t检测内容\t检测情况\t肿瘤类型\n";

open DIS, "$database_path/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {
    chomp;
    my @line = split(/\t/);
    $dis{lc $line[2]} = $line[3];
    push @{$dis2{$line[0]}}, lc $line[2];
    push @id, $line[0];
}

foreach my $ID ($cancer_type) {
    my @family;
    my @ids = split("", $ID);
    for (my $i = 1; $i < @ids; $i = $i + 2) {
        push @family, join("", @ids[0 .. $i]);
    }
    push @family, (grep {/^$ID/} @id);
    foreach my $t (@family) {
        push @{$dis2{$ID}}, @{$dis2{$t}};
    }
}
foreach my $key (keys(%dis2)) {
    my %uniq;
    @{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}

##靶向用药信息
open THERAPY, "$database_path/targetTherapy.txt";
<THERAPY>;
my %therapy;
my %cancer;
while (<THERAPY>) {
    chomp;
    my @line = split("\t");
    if ($line[9] eq 'V' and $line[14] eq 'A' and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) and $line[0] !~ /,/) {
        push @{$cancer{$line[0]}}, $dis{lc $line[2]} if !(grep {$_ eq $dis{lc $line[2]}} @{$cancer{$line[0]}});
        if ($line[1] =~ /fusion/i) {
            push @{$therapy{$line[0]}}, '融合' if !(grep {$_ eq '融合'} @{$therapy{$line[0]}});
        }
        elsif ($line[1] eq "Deletion" or $line[1] =~ /Amplification/) {
            push @{$therapy{$line[0]}}, '扩增' if !(grep {$_ eq '扩增'} @{$therapy{$line[0]}});
        }
        else {
            push @{$therapy{$line[0]}}, '突变' if !(grep {$_ eq '突变'} @{$therapy{$line[0]}});;
        }
    }
}

my $muts_ref = info();

my @muts = @$muts_ref;

for my $gene (sort keys %therapy) {
    # 是否检测
    if (grep {$_ eq $gene} @muts) {
        print OUT "$gene\t", join("/", @{$therapy{$gene}}), "\t未检出变异\t", join("/", @{$cancer{$gene}}), "\n";
    }

}

sub info {

    open INFO, "$database_path/info.csv";
    # 读取并解析表头
    my $header = <INFO>;
    chomp($header);
    my @column_names = split(',', $header);
    while (<INFO>) {
        chomp;
        my @line = split(/,/, $_);
        # 将数据与表头对应
        my %record;
        @record{@column_names} = @line;
        if ($record{'project'} eq $project) {
            if ($record{'mutation'} ne "NA") {
                @muts = split(/\//, $record{'mutation'});
            }
        }
    }
    return \@muts
}