pipeline/script/target_therapy_cnv.pl

#!/usr/bin/env perl
use strict;
use warnings;

die "useage:perl $0 input output project cancer_type" unless @ARGV == 4;

my ($input, $output, $project, $cancer_type) = @ARGV;

open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt";
my $h1 = <THERAPY>;
chomp $h1;
my %therapy;
while (<THERAPY>) {
    chomp;
    my @line = split("\t");
    push @{$therapy{$line[0]}{$line[1]}}, $_ if (($line[1] eq "Deletion" or $line[1] eq "Amplification") and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
}


##药物翻译信息
open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt";
my %drug;
<DRUG>;
while (<DRUG>) {
    chomp;
    my @line = split(/\t/);
    next unless $line[1];
    foreach my $drug (split(/\|/, $line[0])) {
        $drug{lc $drug} = $line[1];
    }
}

sub drug {
    my $drugs = shift @_;
    my @translation;
    foreach my $drug (split(/,/, $drugs)) {
        if ($drug =~ /\+/) {
            my $plus;
            foreach my $drug_c (split(/\s+\+\s+/, $drug)) {
                my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c);
                $plus .= " + $new";
            }
            $plus =~ s/^ \+ //;
            push @translation, $plus;
        }
        else {
            my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug);
            push @translation, $new;
        }
    }
    return (join(",", @translation));
}
=pod
##疾病翻译信息
open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt";
my (%dis,%dis2);
<DIS>;
while(<DIS>){
	chomp;
	my @line=split(/\t/);
	$dis{lc$line[0]}=$line[1];
	push @{$dis2{$line[3]}},$line[0];
	push @{$dis2{$line[4]}},$line[0];
}
=cut

open DIS, "/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt";
<DIS>;
my (%dis, @id, %dis2);
while (<DIS>) {
    chomp;
    my @line = split(/\t/);
    $dis{lc $line[2]} = $line[3];
    $dis{lc $line[4]} = $line[5];
    push @{$dis2{$line[0]}}, lc $line[2];
    push @{$dis2{$line[0]}}, lc $line[4];
    push @id, $line[0];
}
foreach my $ID ($cancer_type) {
    my @family;
    my @ids = split("", $ID);
    for (my $i = 1; $i < @ids; $i = $i + 2) {
        push @family, join("", @ids[0 .. $i]);
    }
    push @family, (grep {/^$ID/} @id);
    foreach my $t (@family) {
        push @{$dis2{$ID}}, @{$dis2{$t}};
    }
}
foreach my $key (keys(%dis2)) {
    my %uniq;
    @{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}};
}
my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/";
open INFO, "$public_path/info.csv";
<INFO>;
while (<INFO>) {
    chomp;
    my @line = split(/,/, $_);
    open POS, ">$output.pos.txt";
    open VUS, ">$output.vus.txt";
    if ($line[0] eq $project) {
        if ($line[5] ne "NA") {
            my @cnv_target = split(/\//, $line[5]);
            my (@pos, @vus, @cnv_detected);
            open IN, "$input";
            my $h2 = <IN>;
            chomp $h2;
            while (<IN>) {
                chomp;
                my @line2 = split(/\t/);
                my @cnv_detected = split(/,/, $line2[3]);
                my %uniq;
                foreach my $cnv_detected (@cnv_detected) {
                    $uniq{$cnv_detected}++;
                    next if $uniq{$cnv_detected} > 1;
                    if (grep {$cnv_detected eq $_} @cnv_target) {
                        my $bool = 0;
                        my $cn = int(0.5 + 2 ** (1 + $line2[4]));
                        if ($cn >= 4 and exists $therapy{$cnv_detected}{'Amplification'}) {
                            foreach my $entry (@{$therapy{$cnv_detected}{'Amplification'}}) {
                                my @line = split("\t", $entry);
                                my $dises;
                                if (!(exists $dis{lc $line[2]})) {
                                    $dises = 'unknow';
                                }
                                else {
                                    $dises = $dis{lc $line[2]};
                                }
                                if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
                                    push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症\t" . &drug($line[3]) . "\t" . $dises;
                                    $bool = 1;
                                }
                                elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {

                                    push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症\t" . &drug($line[3]) . "\t" . $dises;
                                    $bool = 1;
                                }
                                elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
                                    push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dises;
                                    $bool = 1;
                                }
                            }
                        }
                        elsif ($cn == 0 and exists $therapy{$cnv_detected}{'Deletion'}) {
                            foreach my $entry (@{$therapy{$cnv_detected}{'Deletion'}}) {
                                my @line = split("\t", $entry);
                                my $dises;
                                if (!(exists $dis{lc $line[2]})) {
                                    $dises = 'unknow';
                                }
                                else {
                                    $dises = $dis{lc $line[2]};
                                }
                                if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
                                    push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症\t" . &drug($line[3]) . "\t" . $dises;
                                    $bool = 1;
                                }
                                elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
                                    push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症\t" . &drug($line[3]) . "\t" . $dises;
                                    $bool = 1;
                                }
                                elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
                                    push @pos, "$_\t$cn\tOncogenic\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dises;
                                    $bool = 1;
                                }
                            }
                        }
                        else {
                            push @vus, "$_\t$cn\t.";
                        }
                        if ($bool == 0) {
                            push @vus, "$_\t$cn\t.";
                        }
                    }
                }
            }
            if (@pos) {
                my $h = $h2 . "\tcn\tfun_change\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\t标签\t药物中文名\t疾病中文名";
                print POS "$h\n";
                print POS join("\n", @pos) . "\n";
            }

            if (@vus) {
                print VUS $h2 . "\tcn\tfun_change\n";
                print VUS join("\n", @vus) . "\n";
            }
        }
    }
}