262 lines
12 KiB
Perl
262 lines
12 KiB
Perl
|
|
#!/usr/bin/perl
|
|||
|
|
use strict;
|
|||
|
|
use warnings;
|
|||
|
|
|
|||
|
|
die "useage:perl $0 input output project" unless @ARGV == 3;
|
|||
|
|
|
|||
|
|
my ($input, $output, $project) = @ARGV;
|
|||
|
|
|
|||
|
|
my $cancer_type = ($project =~ /^lung/i) ? "B2" : ($project =~ /^crc/i) ? "B10" : "B1";
|
|||
|
|
|
|||
|
|
##将点突变信息记录到%mut
|
|||
|
|
open MUT, "/dataseq/jmdna/codes/reportbase/snv_indel_mutation.csv";
|
|||
|
|
<MUT>;
|
|||
|
|
my %mut;
|
|||
|
|
while (<MUT>) {
|
|||
|
|
my @line = split(/,/);
|
|||
|
|
$mut{$line[0]}{$line[1]} = $line[2];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
##将用药信息记录到%therapy
|
|||
|
|
#open THERAPY,"/home/jm001/project/pancancer_controlsample/650/Tissue/DEMO2/Pros/targetTherapy.txt";
|
|||
|
|
open THERAPY, "/dataseq/jmdna/codes/reportbase/targetTherapy.txt";
|
|||
|
|
my $h1 = <THERAPY>;
|
|||
|
|
chomp $h1;
|
|||
|
|
my %therapy;
|
|||
|
|
while (<THERAPY>) {
|
|||
|
|
chomp;
|
|||
|
|
my @line = split("\t");
|
|||
|
|
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[5] ne 'CSCO' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
##药物翻译信息
|
|||
|
|
open DRUG, "/dataseq/jmdna/codes/reportbase/target_drug.txt";
|
|||
|
|
my %drug;
|
|||
|
|
<DRUG>;
|
|||
|
|
while (<DRUG>) {
|
|||
|
|
chomp;
|
|||
|
|
my @line = split(/\t/);
|
|||
|
|
next unless $line[1];
|
|||
|
|
foreach my $drug (split(/\|/, $line[0])) {
|
|||
|
|
$drug{lc $drug} = $line[1];
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
##疾病翻译信息
|
|||
|
|
open DIS, "/dataseq/jmdna/codes/reportbase/cancer_type.txt";
|
|||
|
|
my (%dis, %dis2);
|
|||
|
|
<DIS>;
|
|||
|
|
while (<DIS>) {
|
|||
|
|
chomp;
|
|||
|
|
my @line = split(/\t/);
|
|||
|
|
$dis{lc $line[0]} = $line[1];
|
|||
|
|
push @{$dis2{$line[3]}}, $line[0];
|
|||
|
|
push @{$dis2{$line[4]}}, $line[0];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
open IN, "$input";
|
|||
|
|
open POS, ">$output.pos.txt";
|
|||
|
|
open NEG, ">$output.neg.txt";
|
|||
|
|
open VUS, ">$output.vus.txt";
|
|||
|
|
my $h2 = <IN>;
|
|||
|
|
chomp $h2;
|
|||
|
|
my (@pos, @neg, @vus);
|
|||
|
|
while (<IN>) {
|
|||
|
|
chomp;
|
|||
|
|
my @line = split("\t");
|
|||
|
|
my ($protein, $mut_type);
|
|||
|
|
my $gene = $line[7];
|
|||
|
|
if ($line[11] =~ /(\w+):(\w+):exon(\d+):c\.(\S+):p\.(\S+)$/) {
|
|||
|
|
my $exon = $3;
|
|||
|
|
my $codon = $4;
|
|||
|
|
if ($5 =~ /\d+X$|\d+\*$/ or $line[10] eq 'stopgain' or $line[10] eq 'frameshift deletion' or $line[10] eq 'frameshift insertion') {
|
|||
|
|
$protein = 'Truncating Mutations';
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
$protein = $5;
|
|||
|
|
}
|
|||
|
|
$mut_type = ($codon =~ /del/) ? ("Exon $exon deletion") : ($codon =~ /ins/) ? ("Exon $exon insertion") : ("Exon $exon mutation");
|
|||
|
|
}
|
|||
|
|
elsif ($line[6] =~ /splicing/) {
|
|||
|
|
$protein = 'Truncating Mutations';
|
|||
|
|
$mut_type = '';
|
|||
|
|
}
|
|||
|
|
elsif ($line[10] =~ /skipping/) {
|
|||
|
|
$protein = 'Exon 14 skipping Mutations';
|
|||
|
|
$mut_type = '';
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
print $_;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral,写入@neg;若基因不存在于%therapy,写入@vus;
|
|||
|
|
if (not exists $mut{$gene}{$protein}) {
|
|||
|
|
if ($line[18] =~ /benign/i and $line[18] !~ /sensitivity|pathogenic|uncertain|\./i and $line[13] ne '.') {
|
|||
|
|
push @neg, "$_\t.";
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
push @vus, "$_\t.";
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
if ($mut{$gene}{$protein} =~ /neutral/i) {
|
|||
|
|
push @neg, "$_\t$mut{$gene}{$protein}";
|
|||
|
|
}
|
|||
|
|
elsif ($mut{$gene}{$protein} =~ /Inconclusive/i) {
|
|||
|
|
push @vus, "$_\t$mut{$gene}{$protein}";
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
if (not exists $therapy{$gene}) {
|
|||
|
|
if ($line[15] =~ /benign/i and $line[18] !~ /sensitivity|pathogenic|uncertain|\./i and $line[13] ne '.') {
|
|||
|
|
push @neg, "$_\t$mut{$gene}{$protein}";
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
push @vus, "$_\t$mut{$gene}{$protein}";
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
my $bool = 0;
|
|||
|
|
##
|
|||
|
|
if (exists $therapy{$gene}{$protein}) {
|
|||
|
|
foreach my $entry (@{$therapy{$gene}{$protein}}) {
|
|||
|
|
my @line = split("\t", $entry);
|
|||
|
|
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
##
|
|||
|
|
if (exists $therapy{$gene}{'Mutation'}) {
|
|||
|
|
foreach my $entry (@{$therapy{$gene}{'Mutation'}}) {
|
|||
|
|
my @line = split("\t", $entry);
|
|||
|
|
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
##
|
|||
|
|
if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1}) {
|
|||
|
|
foreach my $entry (@{$therapy{$gene}{$1}}) {
|
|||
|
|
my @line = split("\t", $entry);
|
|||
|
|
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#
|
|||
|
|
if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1 . "X"}) {
|
|||
|
|
foreach my $entry (@{$therapy{$gene}{$1 . "X"}}) {
|
|||
|
|
my @line = split("\t", $entry);
|
|||
|
|
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#
|
|||
|
|
if (exists $therapy{$gene}{$mut_type}) {
|
|||
|
|
foreach my $entry (@{$therapy{$gene}{$mut_type}}) {
|
|||
|
|
my @line = split("\t", $entry);
|
|||
|
|
if (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (($line[5] eq "FDA" or $line[5] eq "NCCN" or $line[5] eq "NMPA") and (lc $line[2] ne "solid tumor" and grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
elsif (lc $line[2] eq "solid tumor" or grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
|
|||
|
|
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
|
|||
|
|
$bool = 1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if ($bool == 0) {
|
|||
|
|
if ($line[15] =~ /benign/i and $line[15] !~ /sensitivity|pathogenic|uncertain|\./i and $line[14] ne '.') {
|
|||
|
|
push @neg, "$_\t$mut{$gene}{$protein}";
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
push @vus, "$_\t$mut{$gene}{$protein}";
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
sub drug {
|
|||
|
|
my $drugs = shift @_;
|
|||
|
|
my @translation;
|
|||
|
|
foreach my $drug (split(/,/, $drugs)) {
|
|||
|
|
if ($drug =~ /\+/) {
|
|||
|
|
my $plus;
|
|||
|
|
foreach my $drug_c (split(/\s+\+\s+/, $drug)) {
|
|||
|
|
my $new = (exists $drug{lc $drug_c}) ? $drug{lc $drug_c} : ($drug_c);
|
|||
|
|
$plus .= " + $new";
|
|||
|
|
}
|
|||
|
|
$plus =~ s/^ \+ //;
|
|||
|
|
push @translation, $plus;
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
my $new = (exists $drug{lc $drug}) ? $drug{lc $drug} : ($drug);
|
|||
|
|
push @translation, $new;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return (join(",", @translation));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
my $h = $h2 . "\tfun_change\t" . join("\t", (split("\t", $h1))[0 .. 9, 14]) . "\tLabel\tDrugCn\tIndication";
|
|||
|
|
if (@pos) {
|
|||
|
|
print POS "$h\n";
|
|||
|
|
print POS join("\n", @pos) . "\n";
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (@neg) {
|
|||
|
|
print NEG $h2 . "\tfun_change\n";
|
|||
|
|
print NEG join("\n", @neg) . "\n";
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (@vus) {
|
|||
|
|
print VUS $h2 . "\tfun_change\n";
|
|||
|
|
print VUS join("\n", @vus) . "\n";
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|