#!/usr/bin/perl use strict; use warnings; die "useage:perl $0 input output project" unless @ARGV == 3; my ($input, $output, $project) = @ARGV; my $cancer_type = ($project =~ /^lung/i) ? "B2" : ($project =~ /^crc/i) ? "B10" : "B1"; ##将点突变信息记录到%mut open MUT, "/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv"; ; my %mut; while () { my @line = split(/,/); $mut{$line[0]}{$line[1]} = $line[2]; } ##将用药信息记录到%therapy #open THERAPY,"/home/jm001/project/pancancer_controlsample/650/Tissue/DEMO2/Pros/targetTherapy.txt"; open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt"; my $h1 = ; chomp $h1; my %therapy; while () { chomp; my @line = split("\t"); push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[5] ne 'CSCO' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } ##药物翻译信息 open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt"; my %drug; ; while () { chomp; my @line = split(/\t/); next unless $line[1]; foreach my $drug (split(/\|/, $line[0])) { $drug{lc $drug} = $line[1]; } } ##疾病翻译信息 open DIS, "/dataseq/jmdna/codes/reportbase/cancer_type.txt"; my (%dis, %dis2); ; while () { chomp; my @line = split(/\t/); $dis{lc $line[0]} = $line[1]; push @{$dis2{$line[3]}}, $line[0]; push @{$dis2{$line[4]}}, $line[0]; } open IN, "$input"; open POS, ">$output.pos.txt"; open NEG, ">$output.neg.txt"; open VUS, ">$output.vus.txt"; my $h2 = ; chomp $h2; my (@pos, @neg, @vus); while () { chomp; my @line = split("\t"); my ($protein, $mut_type); my $gene = $line[7]; if ($line[11] =~ /(\w+):(\w+):exon(\d+):c\.(\S+):p\.(\S+)$/) { my $exon = $3; my $codon = $4; if ($5 =~ /\d+X$|\d+\*$/ or $line[10] eq 'stopgain' or $line[10] eq 'frameshift deletion' or $line[10] eq 'frameshift insertion') { $protein = 'Truncating Mutations'; } else { $protein = $5; } $mut_type = ($codon =~ /del/) ? ("Exon $exon deletion") : ($codon =~ /ins/) ? ("Exon $exon insertion") : ("Exon $exon mutation"); } elsif ($line[6] =~ /splicing/) { $protein = 'Truncating Mutations'; $mut_type = ''; } elsif ($line[10] =~ /skipping/) { $protein = 'Exon 14 skipping Mutations'; $mut_type = ''; } else { print $_; } ##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral,写入@neg;若基因不存在于%therapy,写入@vus; if (not exists $mut{$gene}{$protein}) { if ($line[18] =~ /benign/i and $line[18] !~ /sensitivity|pathogenic|uncertain|\./i and $line[13] ne '.') { push @neg, "$_\t."; } else { push @vus, "$_\t."; } } else { if ($mut{$gene}{$protein} =~ /neutral/i) { push @neg, "$_\t$mut{$gene}{$protein}"; } elsif ($mut{$gene}{$protein} =~ /Inconclusive/i) { push @vus, "$_\t$mut{$gene}{$protein}"; } else { if (not exists $therapy{$gene}) { if ($line[15] =~ /benign/i and $line[18] !~ /sensitivity|pathogenic|uncertain|\./i and $line[13] ne '.') { push @neg, "$_\t$mut{$gene}{$protein}"; } else { push @vus, "$_\t$mut{$gene}{$protein}"; } } else { my $bool = 0; ## if (exists $therapy{$gene}{$protein}) { foreach my $entry (@{$therapy{$gene}{$protein}}) { my @line = split("\t", $entry); if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } ## if (exists $therapy{$gene}{'Mutation'}) { foreach my $entry (@{$therapy{$gene}{'Mutation'}}) { my @line = split("\t", $entry); if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } ## if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1}) { foreach my $entry (@{$therapy{$gene}{$1}}) { my @line = split("\t", $entry); if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } # if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1 . "X"}) { foreach my $entry (@{$therapy{$gene}{$1 . "X"}}) { my @line = split("\t", $entry); if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } # if (exists $therapy{$gene}{$mut_type}) { foreach my $entry (@{$therapy{$gene}{$mut_type}}) { my @line = split("\t", $entry); if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } if ($bool == 0) { if ($line[15] =~ /benign/i and $line[15] !~ /sensitivity|pathogenic|uncertain|\./i and $line[14] ne '.') { push @neg, "$_\t$mut{$gene}{$protein}"; } else { push @vus, "$_\t$mut{$gene}{$protein}"; } } } } } } sub drug { my $drugs = shift @_; my @translation; foreach my $drug (split(/,/, $drugs)) { if ($drug =~ /\+/) { my $plus; foreach my $drug_c (split(/\s+\+\s+/, $drug)) { my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c); $plus .= " + $new"; } $plus =~ s/^ \+ //; push @translation, $plus; } else { my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug); push @translation, $new; } } return (join(",", @translation)); } my $h = $h2 . "\tfun_change\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\tLabel\tDrugCn\tIndication"; if (@pos) { print POS "$h\n"; print POS join("\n", @pos) . "\n"; } if (@neg) { print NEG $h2 . "\tfun_change\n"; print NEG join("\n", @neg) . "\n"; } if (@vus) { print VUS $h2 . "\tfun_change\n"; print VUS join("\n", @vus) . "\n"; }