pipeline/script/target_therapy_fusion.pl

241 lines
7.9 KiB
Perl
Executable File

#!/usr/bin/env perl
use strict;
use warnings;
die "useage:perl $0 input longvcf_input output longvcf_output project cancer_type" unless @ARGV == 6;
my ($input, $longvcf_input, $output, $longvcf_output, $project, $cancer_type) = @ARGV;
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
open INFO, "$public_path/info.csv";
<INFO>;
my (@fusion, @longindel);
while (<INFO>) {
chomp;
my @line = split(/,/, $_);
if ($line[0] eq $project) {
if ($line[6] ne "NA") {
@fusion = split(/\//, $line[6]);
}
if ($line[7] ne "NA") {
@longindel = split(/\//, $line[7]);
}
}
}
open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt";
my $h1 = <THERAPY>;
chomp $h1;
my %therapy;
while (<THERAPY>) {
chomp;
my @line = split("\t");
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
}
##药物翻译信息
open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt";
my %drug;
<DRUG>;
while (<DRUG>) {
chomp;
my @line = split(/\t/);
next unless $line[1];
foreach my $drug (split(/\|/, $line[0])) {
$drug{lc $drug} = $line[1];
}
}
sub drug {
my $drugs = shift @_;
my @translation;
foreach my $drug (split(/,/, $drugs)) {
if ($drug =~ /\+/) {
my $plus;
foreach my $drug_c (split(/\s+\+\s+/, $drug)) {
my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c);
$plus .= " + $new";
}
$plus =~ s/^ \+ //;
push @translation, $plus;
}
else {
my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug);
push @translation, $new;
}
}
return (join(",", @translation));
}
=pod
##疾病翻译信息
open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt";
my (%dis,%dis2);
<DIS>;
while(<DIS>){
chomp;
my @line=split(/\t/);
$dis{lc$line[0]}=$line[1];
push @{$dis2{$line[3]}},$line[0];
push @{$dis2{$line[4]}},$line[0];
}
=cut
open DIS, "/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {
chomp;
my @line = split(/\t/);
$dis{lc $line[2]} = $line[3];
$dis{lc $line[4]} = $line[5];
push @{$dis2{$line[0]}}, lc $line[2];
push @{$dis2{$line[0]}}, lc $line[4];
push @id, $line[0];
}
foreach my $ID ($cancer_type) {
my @family;
my @ids = split("", $ID);
for (my $i = 1; $i < @ids; $i = $i + 2) {
push @family, join("", @ids[0 .. $i]);
}
push @family, (grep {/^$ID/} @id);
foreach my $t (@family) {
push @{$dis2{$ID}}, @{$dis2{$t}};
}
}
foreach my $key (keys(%dis2)) {
my %uniq;
@{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}
if (@fusion) {
open MUT, "/dataseq/jmdna/codes/reportbase/fusion.csv";
<MUT>;
my %mut;
while (<MUT>) {
my @line = split(/,/);
$mut{$line[1]}{$line[0]} = $line[2];
}
open IN, "$input";
open POS, ">$output.fusion.pos.txt";
open NEG, ">$output.fusion.neg.txt";
open VUS, ">$output.fuison.vus.txt";
my (@pos, @neg, @vus);
my $h2;
while (<IN>) {
next if /^##/;
if (/^#CHROM/) {
$h2 = $_;
chomp $h2;
}
chomp;
my @line = split(/\t/);
if (grep {$_ eq $line[13]} @fusion or grep {$_ eq $line[14]} @fusion) {
my ($gene1, $gene2, $alt) = @line[13, 14, 15];
if (not exists $mut{"$alt Fusion"}{$gene1} and not exists $mut{"$alt Fusion"}{$gene2}) {
push @vus, "$_\t.";
}
else {
my $gene = (keys %{$mut{"$alt Fusion"}})[0];
my $sig = $mut{"$alt Fusion"}{$gene};
if ($sig =~ /neutral/i) {
push @neg, "$_\t$sig";
}
else {
if (not exists $therapy{$gene}) {
push @vus, "$_\t$sig";
}
else {
my $bool = 0;
##
if (exists $therapy{$gene}{"$alt Fusion"}) {
foreach my $entry (@{$therapy{$gene}{"$alt Fusion"}}) {
my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
}
}
##
if (exists $therapy{$gene}{"Fusion"}) {
foreach my $entry (@{$therapy{$gene}{"Fusion"}}) {
my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
}
}
push @vus, "$_\t$sig" if $bool == 0;
}
}
}
}
}
my $h = $h2 . "\tOncogenic\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\tLabel\tDrugCn\tIndication";
if (@pos) {
print POS "$h\n";
print POS join("\n", @pos) . "\n";
}
if (@neg) {
print NEG $h2 . "\tOncogenic\n";
print NEG join("\n", @neg) . "\n";
}
if (@vus) {
print VUS $h2 . "\tOncogenic\n";
print VUS join("\n", @vus) . "\n";
}
}
if (@longindel) {
my @pos;
open LONGINDEL, ">$longvcf_output";
open IN2, "$longvcf_input";
my $h2;
while (<IN2>) {
chomp;
next if /^##/;
if (/^#CHROM/) {
$h2 = $_;
next;
}
my @line = split(/\t/);
$line[7] =~ /Gene.refGene=(.*?);/;
if (grep {$1 =~ /$_/} @longindel) {
if ($1 eq "BCL2L11") {
push @pos, $_ if $line[1] == '111883194';
}
else {
push @pos, $_;
}
}
}
if (@pos) {
print LONGINDEL "$h2\n";
print LONGINDEL join("\n", @pos) . "\n";
}
}