From f527429c1ca4d3f9f829c7b7a8b484cacea60724 Mon Sep 17 00:00:00 2001 From: chaopower Date: Fri, 29 Dec 2023 10:11:01 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- codes/filter_snpindel.pl | 28 +++--- codes/public/hotspot/hotspot_delins.bed | 2 +- codes/public/hotspot/hotspot_snpindel.bed | 2 + codes/target_therapy_cnv.pl | 10 +-- codes/target_therapy_fusion.pl | 20 ++--- codes/target_therapy_longindel.pl | 6 +- codes/target_therapy_snpindel.pl | 100 ++++++++++++---------- codes/vcf_operations.py | 31 ++++--- database/snv_indel_mutation.csv | 3 +- wdl/call_mutation.wdl | 18 +++- wdl/chemo.wdl | 3 - wdl/fusion.wdl | 1 + wdl/hereditary.wdl | 3 - wdl/msi.wdl | 7 -- wdl/neoantigen.wdl | 1 + wdl/pollution.wdl | 5 +- wdl/postprocess.wdl | 3 - wdl/tmb.wdl | 4 +- 18 files changed, 132 insertions(+), 115 deletions(-) create mode 100644 codes/public/hotspot/hotspot_snpindel.bed diff --git a/codes/filter_snpindel.pl b/codes/filter_snpindel.pl index 837caa0..0f50d25 100755 --- a/codes/filter_snpindel.pl +++ b/codes/filter_snpindel.pl @@ -119,9 +119,19 @@ while () { } my $protein; - if ($line[9] eq '.') { - # splicing 位点 或者 MET的 intron 位点 - if ($line[5] =~ /splicing/ or ($line[5] =~ /intron/ and $gene eq "MET")) { + # met 基因 在特定区域 + if (($gene eq "MET") && + ($line[0] eq "chr7") && + (($line[1] > 116411872 && $line[1] <= 116411902) or ($line[2] > 116411872 && $line[2] <= 116411902) or + ($line[1] > 116412043 && $line[1] <= 116412046) or ($line[2] > 116412043 && $line[2] <= 116412046))) { + $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx")); + $line[8] = 'skipping'; + $protein = 'Exon 14 Skipping Mutation'; + + } + elsif ($line[9] eq '.') { + # splicing 位点 + if ($line[5] =~ /splicing/) { my @hgvs = split(/;/, $line[7]); my $hgvs = $hgvs[0]; my $transcript_gene; @@ -132,7 +142,7 @@ while () { $hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/; my $spl = $3; my $exon = $2; - # splicing 位点 的前后 1-2bp 的 或者 MET的基因 + # splicing 位点 的前后 1-2bp 的 if ($spl =~ /\d+\+[1|2]\D+/) { my $intron = $exon; $hgvs =~ s/exon(\d+)/exon$exon;intron$intron/; @@ -146,10 +156,6 @@ while () { else { push @reason, 'not_need_spl'; } - if ($gene eq "MET") { - $line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx")); - $line[8] = 'skipping' - } $protein = 'Truncating Mutations'; } else { @@ -198,8 +204,8 @@ while () { my ($oncogenic_col, $mut_effect_col); my $get_key = "$gene\_$protein"; - if (exists $oncogenic{$get_key}) { - my @get_values = split('&&', $oncogenic{$get_key}); + if (exists $oncogenic{lc $get_key}) { + my @get_values = split('&&', $oncogenic{lc $get_key}); $oncogenic_col = $get_values[0]; $mut_effect_col = $get_values[1]; } @@ -313,7 +319,7 @@ sub get_oncogenic { chomp; my @line = split(","); my $key = join("_", @line[0, 1]); - $sig{$key} = join("&&", @line[2, 3]); + $sig{lc $key} = join("&&", @line[2, 3]); } return %sig; } diff --git a/codes/public/hotspot/hotspot_delins.bed b/codes/public/hotspot/hotspot_delins.bed index 8bafd7b..4e48a74 100644 --- a/codes/public/hotspot/hotspot_delins.bed +++ b/codes/public/hotspot/hotspot_delins.bed @@ -1,3 +1,3 @@ chr7 55242414 55242513 chr7 55248985 55249172 -chr17 37880978 37881164 +chr17 37880978 37881164 \ No newline at end of file diff --git a/codes/public/hotspot/hotspot_snpindel.bed b/codes/public/hotspot/hotspot_snpindel.bed new file mode 100644 index 0000000..715aab9 --- /dev/null +++ b/codes/public/hotspot/hotspot_snpindel.bed @@ -0,0 +1,2 @@ +chr7 116411872 116411902 +chr7 116412043 116412046 \ No newline at end of file diff --git a/codes/target_therapy_cnv.pl b/codes/target_therapy_cnv.pl index 42af8d3..f14c075 100755 --- a/codes/target_therapy_cnv.pl +++ b/codes/target_therapy_cnv.pl @@ -16,7 +16,7 @@ my %therapy; while () { chomp; my @line = split("\t"); - push @{$therapy{$line[0]}{$line[1]}}, $_ if (($line[1] eq "Deletion" or $line[1] eq "Amplification") and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); + push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if (($line[1] eq "Deletion" or $line[1] eq "Amplification") and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } @@ -105,8 +105,8 @@ while () { next if $uniq{$cnv_detected} > 1; my $bool = 0; my $cn = int(0.5 + 2 ** (1 + $line2[4])); - if ($cn >= 4 and exists $therapy{$cnv_detected}{'Amplification'}) { - foreach my $entry (@{$therapy{$cnv_detected}{'Amplification'}}) { + if ($cn >= 4 and exists $therapy{lc $cnv_detected}{lc 'Amplification'}) { + foreach my $entry (@{$therapy{lc $cnv_detected}{lc 'Amplification'}}) { my @line = split("\t", $entry); my $dises; if (!(exists $dis{lc $line[2]})) { @@ -130,8 +130,8 @@ while () { } } } - elsif ($cn == 0 and exists $therapy{$cnv_detected}{'Deletion'}) { - foreach my $entry (@{$therapy{$cnv_detected}{'Deletion'}}) { + elsif ($cn == 0 and exists $therapy{lc $cnv_detected}{lc 'Deletion'}) { + foreach my $entry (@{$therapy{lc $cnv_detected}{lc 'Deletion'}}) { my @line = split("\t", $entry); my $dises; if (!(exists $dis{lc $line[2]})) { diff --git a/codes/target_therapy_fusion.pl b/codes/target_therapy_fusion.pl index 36b23c9..79cd08f 100755 --- a/codes/target_therapy_fusion.pl +++ b/codes/target_therapy_fusion.pl @@ -13,7 +13,7 @@ open MUT, "$database_path/fusion.csv"; my %mut; while () { my @line = split(/,/); - $mut{$line[1]}{$line[0]} = $line[2]; + $mut{lc $line[1]}{lc $line[0]} = $line[2]; } open THERAPY, "$database_path/targetTherapy.txt"; @@ -23,7 +23,7 @@ my %therapy; while () { chomp; my @line = split("\t"); - push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); + push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } ##药物翻译信息 @@ -109,24 +109,24 @@ while () { # 将数据与表头对应 my %record; @record{@column_names} = @splitline; - if (not exists $mut{"$record{'FUSION'} Fusion"}{$record{'GENE1'}} and not exists $mut{"$record{'FUSION'} Fusion"}{$record{'GENE2'}}) { + if (not exists $mut{lc "$record{'FUSION'} Fusion"}{lc $record{'GENE1'}} and not exists $mut{lc "$record{'FUSION'} Fusion"}{lc $record{'GENE2'}}) { push @vus, "$_\t."; } else { - my $gene = (keys %{$mut{"$record{'FUSION'} Fusion"}})[0]; - my $sig = $mut{"$record{'FUSION'} Fusion"}{$gene}; + my $gene = (keys %{$mut{lc "$record{'FUSION'} Fusion"}})[0]; + my $sig = $mut{lc "$record{'FUSION'} Fusion"}{lc $gene}; if ($sig =~ /neutral/i) { push @neg, "$_\t$sig"; } else { - if (not exists $therapy{$gene}) { + if (not exists $therapy{lc $gene}) { push @vus, "$_\t$sig"; } else { my $bool = 0; ## - if (exists $therapy{$gene}{"$record{'FUSION'} Fusion"}) { - foreach my $entry (@{$therapy{$gene}{"$record{'FUSION'}Fusion"}}) { + if (exists $therapy{lc $gene}{lc "$record{'FUSION'} Fusion"}) { + foreach my $entry (@{$therapy{lc $gene}{lc "$record{'FUSION'}Fusion"}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; @@ -143,8 +143,8 @@ while () { } } ## - if (exists $therapy{$gene}{"Fusion"}) { - foreach my $entry (@{$therapy{$gene}{"Fusion"}}) { + if (exists $therapy{lc $gene}{lc "Fusion"}) { + foreach my $entry (@{$therapy{lc $gene}{lc "Fusion"}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; diff --git a/codes/target_therapy_longindel.pl b/codes/target_therapy_longindel.pl index a467028..230e7ae 100755 --- a/codes/target_therapy_longindel.pl +++ b/codes/target_therapy_longindel.pl @@ -27,7 +27,7 @@ while () { chomp; my @line = split("\t"); # push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); - push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); + push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } ##药物翻译信息 @@ -107,9 +107,9 @@ while () { my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7]; - if (exists $therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}) { + if (exists $therapy{lc 'BCL2L11'}{lc 'DELETION POLYMORPHISM'}) { print "$freq\n"; - foreach my $entry (@{$therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}}) { + foreach my $entry (@{$therapy{lc 'BCL2L11'}{lc 'DELETION POLYMORPHISM'}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { diff --git a/codes/target_therapy_snpindel.pl b/codes/target_therapy_snpindel.pl index d0cef2c..8373802 100755 --- a/codes/target_therapy_snpindel.pl +++ b/codes/target_therapy_snpindel.pl @@ -17,7 +17,7 @@ open MUT, "$database_path/snv_indel_mutation.csv"; my %mut; while () { my @line = split(/,/); - $mut{$line[0]}{$line[1]} = $line[2]; + $mut{lc $line[0]}{lc $line[1]} = $line[2]; } ##将用药信息记录到%therapy @@ -28,7 +28,7 @@ my %therapy; while () { chomp; my @line = split("\t"); - push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); + push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); } ##药物翻译信息 @@ -114,12 +114,13 @@ while () { ($codon =~ /dup/) ? ("Exon $exon insertion") : ("Exon $exon mutation"); } - elsif ($record{'Func_refGene'} =~ /splicing/) { - $protein = 'Truncating Mutations'; + elsif ($record{'ExonicFunc_refGene'} =~ /skipping/) { + $protein = 'Exon 14 Skipping Mutation'; + print "$protein\n"; $mut_type = ''; } - elsif ($record{'ExonicFunc_refGene'} =~ /skipping/) { - $protein = 'Exon 14 skipping Mutations'; + elsif ($record{'Func_refGene'} =~ /splicing/) { + $protein = 'Truncating Mutation'; $mut_type = ''; } else { @@ -127,7 +128,7 @@ while () { } ##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral,写入@neg;若基因不存在于%therapy,写入@vus; - if (not exists $mut{$gene}{$protein}) { + if (not exists $mut{lc $gene}{lc $protein}) { if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') { push @neg, "$_\t."; } @@ -136,115 +137,120 @@ while () { } } else { - if ($mut{$gene}{$protein} =~ /neutral/i) { - push @neg, "$_\t$mut{$gene}{$protein}"; + if ($mut{lc $gene}{lc $protein} =~ /neutral/i) { + push @neg, "$_\t$mut{lc $gene}{lc $protein}"; } - elsif ($mut{$gene}{$protein} =~ /Inconclusive/i) { - push @vus, "$_\t$mut{$gene}{$protein}"; + elsif ($mut{lc $gene}{lc $protein} =~ /Inconclusive/i) { + push @vus, "$_\t$mut{lc $gene}{lc $protein}"; } else { - if (not exists $therapy{$gene}) { + if (not exists $therapy{lc $gene}) { if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') { - push @neg, "$_\t$mut{$gene}{$protein}"; + push @neg, "$_\t$mut{lc $gene}{lc $protein}"; } else { - push @vus, "$_\t$mut{$gene}{$protein}"; + push @vus, "$_\t$mut{lc $gene}{lc $protein}"; } } else { my $bool = 0; # 匹配 p. - if (exists $therapy{$gene}{$protein}) { - foreach my $entry (@{$therapy{$gene}{$protein}}) { + if (exists $therapy{lc $gene}{lc $protein}) { + foreach my $entry (@{$therapy{lc $gene}{lc $protein}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } # 匹配 Mutation - if (exists $therapy{$gene}{'Mutation'}) { - foreach my $entry (@{$therapy{$gene}{'Mutation'}}) { + if (exists $therapy{lc $gene}{'mutation'}) { + foreach my $entry (@{$therapy{lc $gene}{'mutation'}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } # 去掉最后一个字符 例如V600E 去掉之后 V600再去匹配 - if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1}) { - foreach my $entry (@{$therapy{$gene}{$1}}) { + if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{lc $gene}{lc $1}) { + foreach my $entry (@{$therapy{lc $gene}{lc $1}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } # 去掉最后一个字符 加上“.X”去匹配 - if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1 . "X"}) { - foreach my $entry (@{$therapy{$gene}{$1 . "X"}}) { + if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{lc $gene}{lc $1 . "X"}) { + foreach my $entry (@{$therapy{lc $gene}{lc $1 . "X"}}) { my @line = split("\t", $entry); if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { - push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; $bool = 1; } } } # 外显子 模式去匹配 - if (exists $therapy{$gene}{$mut_type}) { - foreach my $entry(@{$therapy{$gene}{$mut_type}}){ - my @line=split("\t",$entry); - if (($line[14] eq 'A') and (grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){ - push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1; - }elsif(($line[14] eq 'A') and (grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){ - push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1; - }elsif(grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){ - push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1; - } - } + if (exists $therapy{lc $gene}{lc $mut_type}) { + foreach my $entry (@{$therapy{lc $gene}{lc $mut_type}}) { + my @line = split("\t", $entry); + if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + $bool = 1; + } + elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + $bool = 1; + } + elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { + push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; + $bool = 1; + } + } } # 没有匹配上 if ($bool == 0) { if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') { - push @neg, "$_\t$mut{$gene}{$protein}"; + push @neg, "$_\t$mut{lc $gene}{lc $protein}"; } else { - push @vus, "$_\t$mut{$gene}{$protein}"; + push @vus, "$_\t$mut{lc $gene}{lc $protein}"; } } } diff --git a/codes/vcf_operations.py b/codes/vcf_operations.py index 6c1501d..667721f 100755 --- a/codes/vcf_operations.py +++ b/codes/vcf_operations.py @@ -1,14 +1,16 @@ #! /usr/bin/env python3 import argparse + import pysam class VcfSetOperations: - def __init__(self, vcf_file1, vcf_file2=None, bed_file=None, merge=False): + def __init__(self, vcf_file1, vcf_file2=None, bed_file=None, merge=False, snp=False): self.vcf1 = pysam.VariantFile(vcf_file1) self.vcf2 = pysam.VariantFile(vcf_file2) if vcf_file2 else None self.bed_regions = self.load_bed_regions(bed_file) if bed_file else None + self.snp = snp self.merge = merge def build_record_dict(self, vcf_file): @@ -39,7 +41,7 @@ class VcfSetOperations: for record1 in self.vcf1: key = (record1.contig, record1.pos, record1.ref, record1.alts[0]) - if self.is_within_bed_region(record1) and (records2 is None or key in records2): + if self.is_within_bed_region(record1, self.snp) and (records2 is None or key in records2): output_vcf.write(record1) output_vcf.close() @@ -54,16 +56,24 @@ class VcfSetOperations: output_vcf.write(record2) output_vcf.close() - def is_within_bed_region(self, record): + def is_within_bed_region(self, record, snp=False): + # 如果没有提供 BED 文件,则不筛选 if not self.bed_regions: - return True # 如果没有提供 BED 文件,则不筛选 - # 检查记录是否是点突变 - if len(record.ref) == 1 and len(record.alts[0]) == 1: - return False # 如果是点突变,不考虑 + return True + # 检查记录是否是点突变 # 如果是点突变,不考虑 + if len(record.ref) == 1 and len(record.alts[0]) == 1 and not snp: + return False for (bed_chrom, bed_start, bed_end) in self.bed_regions: + # 如果在 BED 区域内有插入、删除或替代的变异,返回 True if record.contig == bed_chrom and bed_start < record.pos <= bed_end: - return True # 如果在 BED 区域内有插入、删除或替代的变异,返回 True - return False # 如果在 BED 区域内没有插入、删除或替代的变异,返回 False + return True + # 缺失的情况,考虑终点位置在bed区域内 + if len(record.ref) > len(record.alts[0]): + end_pos = record.pos + len(record.ref) + if record.contig == bed_chrom and bed_start < end_pos <= bed_end: + return True + # 如果在 BED 区域内没有插入、删除或替代的变异,返回 False + return False def close_files(self): if self.vcf1: @@ -77,6 +87,7 @@ if __name__ == "__main__": parser.add_argument("vcf1", help="First VCF file") parser.add_argument("-v", "--vcf2", help="Second VCF file for intersection (optional)") parser.add_argument("-b", "--bed", help="BED file for filtering (optional)") + parser.add_argument("-s", "--snp", action="store_true", help="BED region for snp ") parser.add_argument("-m", "--merge", action="store_true", help="Merge two VCF files (optional)") parser.add_argument("-o", "--output", help="Output file (optional)") @@ -90,6 +101,6 @@ if __name__ == "__main__": if not args.vcf2 and not args.bed: print("Error: You need to specify either VCF2 or BED filtering.") else: - vcf_set_operations = VcfSetOperations(args.vcf1, args.vcf2, args.bed) + vcf_set_operations = VcfSetOperations(args.vcf1, args.vcf2, args.bed, args.snp) vcf_set_operations.find_intersection(args.output) vcf_set_operations.close_files() diff --git a/database/snv_indel_mutation.csv b/database/snv_indel_mutation.csv index aef20f2..ab66cf7 100755 --- a/database/snv_indel_mutation.csv +++ b/database/snv_indel_mutation.csv @@ -5297,7 +5297,8 @@ MET,M1250T,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,981_1028splice,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,H1094R,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,Y1230H,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No -MET,Exon 14 Deletion,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No +MET,Exon 14 Skipping Mutation,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No +MET,Exon 14 skipping mutation,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,Exon 14 deletions,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,D1228N,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,X1010_splice,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No diff --git a/wdl/call_mutation.wdl b/wdl/call_mutation.wdl index 4499538..51ca7dd 100755 --- a/wdl/call_mutation.wdl +++ b/wdl/call_mutation.wdl @@ -302,14 +302,24 @@ task hotspot { vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \ -b $PUBLIC/hotspot/hotspot_delins.bed \ - -o ${output_dir}/mutation/${name}.hotspot.delins.vcf + -o ${output_dir}/mutation/${name}.raw.hotspot.delins.vcf + + vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \ + -b $PUBLIC/hotspot/hotspot_snpindel.bed \ + -o ${output_dir}/mutation/${name}.raw.hotspot.snpindel.vcf \ + -s vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \ -v $PUBLIC/hotspot/hotspot_snv.vcf \ - -o ${output_dir}/mutation/${name}.hotspot.snv.vcf + -o ${output_dir}/mutation/${name}.raw.hotspot.snv.vcf - vcf_operations.py ${output_dir}/mutation/${name}.hotspot.snv.vcf \ - -v ${output_dir}/mutation/${name}.hotspot.delins.vcf \ + vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.snv.vcf \ + -v ${output_dir}/mutation/${name}.raw.hotspot.delins.vcf \ + -o ${output_dir}/mutation/${name}.raw.hotspot.snv_delins.vcf \ + -m + + vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.snv_delins.vcf \ + -v ${output_dir}/mutation/${name}.raw.hotspot.snpindel.vcf \ -o ${output_dir}/mutation/${name}.hotspot.vcf \ -m diff --git a/wdl/chemo.wdl b/wdl/chemo.wdl index 7f9aa5e..35fc2be 100755 --- a/wdl/chemo.wdl +++ b/wdl/chemo.wdl @@ -17,9 +17,6 @@ task run_chemo { >>> - output { - String run_chemo_res = "${output_dir}/chemo/${name}.drug.res.txt" - } } workflow call_chemo { diff --git a/wdl/fusion.wdl b/wdl/fusion.wdl index 7995c58..3eb5478 100755 --- a/wdl/fusion.wdl +++ b/wdl/fusion.wdl @@ -1,3 +1,4 @@ +# fusion task rmdup_picard { String name diff --git a/wdl/hereditary.wdl b/wdl/hereditary.wdl index c2dda62..8b50fe0 100755 --- a/wdl/hereditary.wdl +++ b/wdl/hereditary.wdl @@ -15,9 +15,6 @@ task run_hereditary { >>> - output { - String run_hereditary_txt = "${output_dir}/hereditary/${name}.hereditary.txt" - } } workflow call_hereditary { diff --git a/wdl/msi.wdl b/wdl/msi.wdl index b566ed2..d615c9d 100755 --- a/wdl/msi.wdl +++ b/wdl/msi.wdl @@ -19,9 +19,6 @@ task msi_single { -o ${output_dir}/msi/${name}.msi.txt >>> - output { - String run_msi_txt = "${output_dir}/msi/${name}.msi.txt" - } } task msi_paired { @@ -46,10 +43,6 @@ task msi_paired { >>> - output { - String run_msi_txt = "${output_dir}/msi/${name}.msi.txt" - } - } workflow call_msi { diff --git a/wdl/neoantigen.wdl b/wdl/neoantigen.wdl index a283fe0..87e9087 100755 --- a/wdl/neoantigen.wdl +++ b/wdl/neoantigen.wdl @@ -1,3 +1,4 @@ +# neoantigen task run_neoantigen { String tumor diff --git a/wdl/pollution.wdl b/wdl/pollution.wdl index 737cf8f..7dc4bb1 100755 --- a/wdl/pollution.wdl +++ b/wdl/pollution.wdl @@ -1,3 +1,4 @@ +# pollution task run_pollution { String name @@ -20,10 +21,6 @@ task run_pollution { -c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed >>> - - output { - String run_pollution_res = "${output_dir}/pollution/${name}_pollution.csv" - } } workflow call_pollution { diff --git a/wdl/postprocess.wdl b/wdl/postprocess.wdl index 34ae89a..e2c7855 100755 --- a/wdl/postprocess.wdl +++ b/wdl/postprocess.wdl @@ -27,9 +27,6 @@ task run_post { >>> - output { - String run_merged = "${output_dir}/report/${name}.merged_file.xlsx" - } } workflow call_postprocess { diff --git a/wdl/tmb.wdl b/wdl/tmb.wdl index 4b9e93f..6a4f2b5 100755 --- a/wdl/tmb.wdl +++ b/wdl/tmb.wdl @@ -6,6 +6,7 @@ task run_tmb { String project String sample_type String output_dir + command <<< if [ ! -d ${output_dir}/tmb ];then @@ -20,9 +21,6 @@ task run_tmb { tmb >>> - output { - String run_tmb_txt = "${output_dir}/tmb/${name}.tmb.txt" - } } workflow call_tmb {