pipeline/codes/target_therapy_fusion.pl

192 lines
6.2 KiB
Perl
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env perl
use strict;
use warnings;
die "useage:perl $0 input output cancer_type" unless @ARGV == 3;
my ($input, $output, $cancer_type) = @ARGV;
my $database_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase";
print "Fusion药物注释使用路径$database_path\n";
open MUT, "$database_path/fusion.csv";
<MUT>;
my %mut;
while (<MUT>) {
my @line = split(/,/);
$mut{lc $line[1]}{lc $line[0]} = $line[2];
}
open THERAPY, "$database_path/targetTherapy.txt";
my $h1 = <THERAPY>;
chomp $h1;
my %therapy;
while (<THERAPY>) {
chomp;
my @line = split("\t");
push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
}
##药物翻译信息
open DRUG, "$database_path/target_drug.txt";
my %drug;
<DRUG>;
while (<DRUG>) {
chomp;
my @line = split(/\t/);
next unless $line[1];
foreach my $drug (split(/\|/, $line[0])) {
$drug{lc $drug} = $line[1];
}
}
sub drug {
my $drugs = shift @_;
my @translation;
foreach my $drug (split(/,/, $drugs)) {
if ($drug =~ /\+/) {
my $plus;
foreach my $drug_c (split(/\s+\+\s+/, $drug)) {
my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c);
$plus .= " + $new";
}
$plus =~ s/^ \+ //;
push @translation, $plus;
}
else {
my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug);
push @translation, $new;
}
}
return (join(",", @translation));
}
open DIS, "$database_path/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {
chomp;
my @line = split(/\t/);
$dis{lc $line[2]} = $line[3];
$dis{lc $line[4]} = $line[5];
push @{$dis2{$line[0]}}, lc $line[2];
push @{$dis2{$line[0]}}, lc $line[4];
push @id, $line[0];
}
foreach my $ID ($cancer_type) {
my @family;
my @ids = split("", $ID);
for (my $i = 1; $i < @ids; $i = $i + 2) {
push @family, join("", @ids[0 .. $i]);
}
push @family, (grep {/^$ID/} @id);
foreach my $t (@family) {
push @{$dis2{$ID}}, @{$dis2{$t}};
}
}
foreach my $key (keys(%dis2)) {
my %uniq;
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}
open IN, "$input";
open POS, ">$output.pos.txt";
open NEG, ">$output.neg.txt";
open VUS, ">$output.vus.txt";
my $h2 = <IN>;
chomp $h2;
print POS $h2 . "\tOncogenic\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\tLabel\tDrugCn\tIndication\n";
print NEG $h2 . "\tOncogenic\n";
print VUS $h2 . "\tOncogenic\n";
my (@pos, @neg, @vus);
my @column_names = split('\t', $h2);
while (<IN>) {
chomp;
my @splitline = split(/\t/);
# 将数据与表头对应
my %record;
@record{@column_names} = @splitline;
if (not exists $mut{lc "$record{'FUSION'} Fusion"}{lc $record{'GENE1'}} and not exists $mut{lc "$record{'FUSION'} Fusion"}{lc $record{'GENE2'}}) {
push @vus, "$_\t.";
}
else {
my $gene = (keys %{$mut{lc "$record{'FUSION'} Fusion"}})[0];
my $sig = $mut{lc "$record{'FUSION'} Fusion"}{lc $gene};
if ($sig =~ /neutral/i) {
push @neg, "$_\t$sig";
}
else {
if (not exists $therapy{lc $gene}) {
push @vus, "$_\t$sig";
}
else {
my $bool = 0;
##
if (exists $therapy{lc $gene}{lc "$record{'FUSION'} Fusion"}) {
foreach my $entry (@{$therapy{lc $gene}{lc "$record{'FUSION'}Fusion"}}) {
my @line = split("\t", $entry);
if (!defined($line[16])) {
$line[16] = '';
}
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]} . "\t" . $line[16];
$bool = 1;
}
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]} . "\t" . $line[16];
$bool = 1;
}
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]} . "\t" . $line[16];
$bool = 1;
}
}
}
##
if (exists $therapy{lc $gene}{lc "Fusion"}) {
foreach my $entry (@{$therapy{lc $gene}{lc "Fusion"}}) {
my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]} . "\t" . $line[16];
$bool = 1;
}
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]} . "\t" . $line[16];
$bool = 1;
}
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]} . "\t" . $line[16];
$bool = 1;
}
}
}
push @vus, "$_\t$sig" if $bool == 0;
}
}
}
}
if (@pos) {
print POS join("\n", @pos) . "\n";
}
if (@neg) {
print NEG join("\n", @neg) . "\n";
}
if (@vus) {
print VUS join("\n", @vus) . "\n";
}