#!/usr/bin/env perl use strict; use warnings; die "useage:perl $0 input longvcf_input output longvcf_output project cancer_type" unless @ARGV == 6; my ($input, $longvcf_input, $output, $longvcf_output, $project, $cancer_type) = @ARGV; my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/"; open INFO, "$public_path/info.csv"; ; my (@fusion, @longindel); while () { chomp; my @line = split(/,/, $_); if ($line[0] eq $project) { if ($line[6] ne "NA") { @fusion = split(/\//, $line[6]); } if ($line[7] ne "NA") { @longindel = split(/\//, $line[7]); } } } open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt"; my $h1 = ; chomp $h1; my %therapy; while () { chomp; my @line = split("\t"); push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } ##药物翻译信息 open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt"; my %drug; ; while () { chomp; my @line = split(/\t/); next unless $line[1]; foreach my $drug (split(/\|/, $line[0])) { $drug{lc $drug} = $line[1]; } } sub drug { my $drugs = shift @_; my @translation; foreach my $drug (split(/,/, $drugs)) { if ($drug =~ /\+/) { my $plus; foreach my $drug_c (split(/\s+\+\s+/, $drug)) { my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c); $plus .= " + $new"; } $plus =~ s/^ \+ //; push @translation, $plus; } else { my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug); push @translation, $new; } } return (join(",", @translation)); } =pod ##疾病翻译信息 open DIS,"/dataseq/jmdna/codes/reportbase/cancer_type.txt"; my (%dis,%dis2); ; while(){ chomp; my @line=split(/\t/); $dis{lc$line[0]}=$line[1]; push @{$dis2{$line[3]}},$line[0]; push @{$dis2{$line[4]}},$line[0]; } =cut open DIS, "/dataseq/jmdna/codes/reportbase/oncotree.cancertype.20230801.txt"; ; my (%dis, @id, %dis2); while () { chomp; my @line = split(/\t/); $dis{lc $line[2]} = $line[3]; $dis{lc $line[4]} = $line[5]; push @{$dis2{$line[0]}}, lc $line[2]; push @{$dis2{$line[0]}}, lc $line[4]; push @id, $line[0]; } foreach my $ID ($cancer_type) { my @family; my @ids = split("", $ID); for (my $i = 1; $i < @ids; $i = $i + 2) { push @family, join("", @ids[0 .. $i]); } push @family, (grep {/^$ID/} @id); foreach my $t (@family) { push @{$dis2{$ID}}, @{$dis2{$t}}; } } foreach my $key (keys(%dis2)) { my %uniq; @{$dis2{$key}} = grep {++$uniq{$_} < 2} @{$dis2{$key}}; } if (@fusion) { open MUT, "/dataseq/jmdna/codes/reportbase/fusion.csv"; ; my %mut; while () { my @line = split(/,/); $mut{$line[1]}{$line[0]} = $line[2]; } open IN, "$input"; open POS, ">$output.fusion.pos.txt"; open NEG, ">$output.fusion.neg.txt"; open VUS, ">$output.fuison.vus.txt"; my (@pos, @neg, @vus); my $h2; while () { next if /^##/; if (/^#CHROM/) { $h2 = $_; chomp $h2; } chomp; my @line = split(/\t/); if (grep {$_ eq $line[13]} @fusion or grep {$_ eq $line[14]} @fusion) { my ($gene1, $gene2, $alt) = @line[13, 14, 15]; if (not exists $mut{"$alt Fusion"}{$gene1} and not exists $mut{"$alt Fusion"}{$gene2}) { push @vus, "$_\t."; } else { my $gene = (keys %{$mut{"$alt Fusion"}})[0]; my $sig = $mut{"$alt Fusion"}{$gene}; if ($sig =~ /neutral/i) { push @neg, "$_\t$sig"; } else { if (not exists $therapy{$gene}) { push @vus, "$_\t$sig"; } else { my $bool = 0; ## if (exists $therapy{$gene}{"$alt Fusion"}) { foreach my $entry (@{$therapy{$gene}{"$alt Fusion"}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } ## if (exists $therapy{$gene}{"Fusion"}) { foreach my $entry (@{$therapy{$gene}{"Fusion"}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } push @vus, "$_\t$sig" if $bool == 0; } } } } } my $h = $h2 . "\tOncogenic\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\tLabel\tDrugCn\tIndication"; if (@pos) { print POS "$h\n"; print POS join("\n", @pos) . "\n"; } if (@neg) { print NEG $h2 . "\tOncogenic\n"; print NEG join("\n", @neg) . "\n"; } if (@vus) { print VUS $h2 . "\tOncogenic\n"; print VUS join("\n", @vus) . "\n"; } } if (@longindel) { my @pos; open LONGINDEL, ">$longvcf_output"; open IN2, "$longvcf_input"; my $h2; while () { chomp; next if /^##/; if (/^#CHROM/) { $h2 = $_; next; } my @line = split(/\t/); $line[7] =~ /Gene.refGene=(.*?);/; if (grep {$1 =~ /$_/} @longindel) { if ($1 eq "BCL2L11") { push @pos, $_ if $line[1] == '111883194'; } else { push @pos, $_; } } } if (@pos) { print LONGINDEL "$h2\n"; print LONGINDEL join("\n", @pos) . "\n"; } }