2023-11-01 10:09:29 +08:00
|
|
|
|
#!/usr/bin/env perl
|
|
|
|
|
|
use strict;
|
|
|
|
|
|
use warnings;
|
|
|
|
|
|
|
2023-11-29 15:13:30 +08:00
|
|
|
|
die "useage:perl $0 input output cancer_type" unless @ARGV == 3;
|
2023-11-01 10:09:29 +08:00
|
|
|
|
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my ($input, $output, $cancer_type) = @ARGV;
|
|
|
|
|
|
|
|
|
|
|
|
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
|
|
|
|
|
|
print "Cnv药物注释使用路径:$database_path\n";
|
2023-11-01 10:09:29 +08:00
|
|
|
|
|
2023-11-30 15:31:35 +08:00
|
|
|
|
open THERAPY, "$database_path/targetTherapy.txt";
|
2023-11-01 10:09:29 +08:00
|
|
|
|
my $h1 = <THERAPY>;
|
|
|
|
|
|
chomp $h1;
|
|
|
|
|
|
my %therapy;
|
|
|
|
|
|
while (<THERAPY>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
my @line = split("\t");
|
2023-12-29 10:11:01 +08:00
|
|
|
|
push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if (($line[1] eq "Deletion" or $line[1] eq "Amplification") and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
|
2023-11-01 10:09:29 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##药物翻译信息
|
2023-11-30 15:31:35 +08:00
|
|
|
|
open DRUG, "$database_path/target_drug.txt";
|
2023-11-01 10:09:29 +08:00
|
|
|
|
my %drug;
|
|
|
|
|
|
<DRUG>;
|
|
|
|
|
|
while (<DRUG>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
my @line = split(/\t/);
|
|
|
|
|
|
next unless $line[1];
|
|
|
|
|
|
foreach my $drug (split(/\|/, $line[0])) {
|
|
|
|
|
|
$drug{lc $drug} = $line[1];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub drug {
|
|
|
|
|
|
my $drugs = shift @_;
|
|
|
|
|
|
my @translation;
|
|
|
|
|
|
foreach my $drug (split(/,/, $drugs)) {
|
|
|
|
|
|
if ($drug =~ /\+/) {
|
|
|
|
|
|
my $plus;
|
|
|
|
|
|
foreach my $drug_c (split(/\s+\+\s+/, $drug)) {
|
|
|
|
|
|
my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c);
|
|
|
|
|
|
$plus .= " + $new";
|
|
|
|
|
|
}
|
|
|
|
|
|
$plus =~ s/^ \+ //;
|
|
|
|
|
|
push @translation, $plus;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug);
|
|
|
|
|
|
push @translation, $new;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return (join(",", @translation));
|
|
|
|
|
|
}
|
2023-11-29 15:13:30 +08:00
|
|
|
|
|
2023-11-01 10:09:29 +08:00
|
|
|
|
|
2023-11-30 15:31:35 +08:00
|
|
|
|
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
|
2023-11-01 10:09:29 +08:00
|
|
|
|
<DIS>;
|
|
|
|
|
|
my (%dis, @id, %dis2);
|
|
|
|
|
|
while (<DIS>) {
|
|
|
|
|
|
chomp;
|
|
|
|
|
|
my @line = split(/\t/);
|
|
|
|
|
|
$dis{lc $line[2]} = $line[3];
|
|
|
|
|
|
$dis{lc $line[4]} = $line[5];
|
|
|
|
|
|
push @{$dis2{$line[0]}}, lc $line[2];
|
|
|
|
|
|
push @{$dis2{$line[0]}}, lc $line[4];
|
|
|
|
|
|
push @id, $line[0];
|
|
|
|
|
|
}
|
|
|
|
|
|
foreach my $ID ($cancer_type) {
|
|
|
|
|
|
my @family;
|
|
|
|
|
|
my @ids = split("", $ID);
|
|
|
|
|
|
for (my $i = 1; $i < @ids; $i = $i + 2) {
|
|
|
|
|
|
push @family, join("", @ids[0 .. $i]);
|
|
|
|
|
|
}
|
|
|
|
|
|
push @family, (grep {/^$ID/} @id);
|
|
|
|
|
|
foreach my $t (@family) {
|
|
|
|
|
|
push @{$dis2{$ID}}, @{$dis2{$t}};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
foreach my $key (keys(%dis2)) {
|
|
|
|
|
|
my %uniq;
|
|
|
|
|
|
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
|
|
|
|
|
|
}
|
2023-11-29 15:13:30 +08:00
|
|
|
|
|
|
|
|
|
|
open POS, ">$output.pos.txt";
|
|
|
|
|
|
open VUS, ">$output.vus.txt";
|
|
|
|
|
|
|
|
|
|
|
|
my (@pos, @vus, @cnv_detected);
|
|
|
|
|
|
open IN, "$input";
|
|
|
|
|
|
my $h2 = <IN>;
|
|
|
|
|
|
chomp $h2;
|
|
|
|
|
|
|
|
|
|
|
|
my $h = $h2 . "\tcn\tfun_change\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\tLabel\tDrugCn\tIndication";
|
|
|
|
|
|
print POS "$h\n";
|
|
|
|
|
|
print VUS "$h\n";;
|
|
|
|
|
|
|
|
|
|
|
|
while (<IN>) {
|
2023-11-01 10:09:29 +08:00
|
|
|
|
chomp;
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my @line2 = split(/\t/);
|
|
|
|
|
|
@cnv_detected = split(/,/, $line2[3]);
|
|
|
|
|
|
my %uniq;
|
|
|
|
|
|
foreach my $cnv_detected (@cnv_detected) {
|
|
|
|
|
|
$uniq{$cnv_detected}++;
|
|
|
|
|
|
next if $uniq{$cnv_detected} > 1;
|
|
|
|
|
|
my $bool = 0;
|
|
|
|
|
|
my $cn = int(0.5 + 2 ** (1 + $line2[4]));
|
2023-12-29 10:11:01 +08:00
|
|
|
|
if ($cn >= 4 and exists $therapy{lc $cnv_detected}{lc 'Amplification'}) {
|
|
|
|
|
|
foreach my $entry (@{$therapy{lc $cnv_detected}{lc 'Amplification'}}) {
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my @line = split("\t", $entry);
|
2024-02-19 16:13:10 +08:00
|
|
|
|
if (!defined($line[16])) {
|
|
|
|
|
|
$line[16] = '';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my $dises;
|
|
|
|
|
|
if (!(exists $dis{lc $line[2]})) {
|
|
|
|
|
|
$dises = 'unknow';
|
2023-11-01 10:09:29 +08:00
|
|
|
|
}
|
2023-11-29 15:13:30 +08:00
|
|
|
|
else {
|
|
|
|
|
|
$dises = $dis{lc $line[2]};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
2024-02-19 16:13:10 +08:00
|
|
|
|
push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症\t" . &drug($line[3]) . "\t" . $dises . "\t" . $line[16];
|
2023-11-29 15:13:30 +08:00
|
|
|
|
$bool = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
2023-11-01 10:09:29 +08:00
|
|
|
|
|
2024-02-19 16:13:10 +08:00
|
|
|
|
push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症\t" . &drug($line[3]) . "\t" . $dises . "\t" . $line[16];
|
2023-11-29 15:13:30 +08:00
|
|
|
|
$bool = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
2024-02-19 16:13:10 +08:00
|
|
|
|
push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dises . "\t" . $line[16];
|
2023-11-29 15:13:30 +08:00
|
|
|
|
$bool = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-12-29 10:11:01 +08:00
|
|
|
|
elsif ($cn == 0 and exists $therapy{lc $cnv_detected}{lc 'Deletion'}) {
|
|
|
|
|
|
foreach my $entry (@{$therapy{lc $cnv_detected}{lc 'Deletion'}}) {
|
2023-11-29 15:13:30 +08:00
|
|
|
|
my @line = split("\t", $entry);
|
|
|
|
|
|
my $dises;
|
|
|
|
|
|
if (!(exists $dis{lc $line[2]})) {
|
|
|
|
|
|
$dises = 'unknow';
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
$dises = $dis{lc $line[2]};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
2024-02-19 16:13:10 +08:00
|
|
|
|
push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症\t" . &drug($line[3]) . "\t" . $dises . "\t" . $line[16];
|
2023-11-29 15:13:30 +08:00
|
|
|
|
$bool = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
2024-02-19 16:13:10 +08:00
|
|
|
|
push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症\t" . &drug($line[3]) . "\t" . $dises . "\t" . $line[16];
|
2023-11-29 15:13:30 +08:00
|
|
|
|
$bool = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
2024-02-19 16:13:10 +08:00
|
|
|
|
push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dises . "\t" . $line[16];
|
2023-11-29 15:13:30 +08:00
|
|
|
|
$bool = 1;
|
|
|
|
|
|
}
|
2023-11-01 10:09:29 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-11-29 15:13:30 +08:00
|
|
|
|
else {
|
|
|
|
|
|
push @vus, "$_\t$cn\t.";
|
|
|
|
|
|
}
|
|
|
|
|
|
if ($bool == 0) {
|
|
|
|
|
|
push @vus, "$_\t$cn\t.";
|
|
|
|
|
|
}
|
2023-11-01 10:09:29 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-11-29 15:13:30 +08:00
|
|
|
|
if (@pos) {
|
|
|
|
|
|
print POS join("\n", @pos) . "\n";
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (@vus) {
|
|
|
|
|
|
print VUS join("\n", @vus) . "\n";
|
|
|
|
|
|
}
|
2023-11-01 10:09:29 +08:00
|
|
|
|
|
|
|
|
|
|
|