master
chaopower 2023-12-29 10:11:01 +08:00
parent a48795ac8b
commit f527429c1c
18 changed files with 132 additions and 115 deletions

View File

@ -119,9 +119,19 @@ while (<IN>) {
} }
my $protein; my $protein;
if ($line[9] eq '.') { # met 基因 在特定区域
# splicing 位点 或者 MET的 intron 位点 if (($gene eq "MET") &&
if ($line[5] =~ /splicing/ or ($line[5] =~ /intron/ and $gene eq "MET")) { ($line[0] eq "chr7") &&
(($line[1] > 116411872 && $line[1] <= 116411902) or ($line[2] > 116411872 && $line[2] <= 116411902) or
($line[1] > 116412043 && $line[1] <= 116412046) or ($line[2] > 116412043 && $line[2] <= 116412046))) {
$line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
$line[8] = 'skipping';
$protein = 'Exon 14 Skipping Mutation';
}
elsif ($line[9] eq '.') {
# splicing 位点
if ($line[5] =~ /splicing/) {
my @hgvs = split(/;/, $line[7]); my @hgvs = split(/;/, $line[7]);
my $hgvs = $hgvs[0]; my $hgvs = $hgvs[0];
my $transcript_gene; my $transcript_gene;
@ -132,7 +142,7 @@ while (<IN>) {
$hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/; $hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/;
my $spl = $3; my $spl = $3;
my $exon = $2; my $exon = $2;
# splicing 位点 的前后 1-2bp 的 或者 MET的基因 # splicing 位点 的前后 1-2bp 的
if ($spl =~ /\d+\+[1|2]\D+/) { if ($spl =~ /\d+\+[1|2]\D+/) {
my $intron = $exon; my $intron = $exon;
$hgvs =~ s/exon(\d+)/exon$exon;intron$intron/; $hgvs =~ s/exon(\d+)/exon$exon;intron$intron/;
@ -146,10 +156,6 @@ while (<IN>) {
else { else {
push @reason, 'not_need_spl'; push @reason, 'not_need_spl';
} }
if ($gene eq "MET") {
$line[9] = join(":", ($gene, "NM_000245", "exon14", "c.xxx"));
$line[8] = 'skipping'
}
$protein = 'Truncating Mutations'; $protein = 'Truncating Mutations';
} }
else { else {
@ -198,8 +204,8 @@ while (<IN>) {
my ($oncogenic_col, $mut_effect_col); my ($oncogenic_col, $mut_effect_col);
my $get_key = "$gene\_$protein"; my $get_key = "$gene\_$protein";
if (exists $oncogenic{$get_key}) { if (exists $oncogenic{lc $get_key}) {
my @get_values = split('&&', $oncogenic{$get_key}); my @get_values = split('&&', $oncogenic{lc $get_key});
$oncogenic_col = $get_values[0]; $oncogenic_col = $get_values[0];
$mut_effect_col = $get_values[1]; $mut_effect_col = $get_values[1];
} }
@ -313,7 +319,7 @@ sub get_oncogenic {
chomp; chomp;
my @line = split(","); my @line = split(",");
my $key = join("_", @line[0, 1]); my $key = join("_", @line[0, 1]);
$sig{$key} = join("&&", @line[2, 3]); $sig{lc $key} = join("&&", @line[2, 3]);
} }
return %sig; return %sig;
} }

View File

@ -0,0 +1,2 @@
chr7 116411872 116411902
chr7 116412043 116412046

View File

@ -16,7 +16,7 @@ my %therapy;
while (<THERAPY>) { while (<THERAPY>) {
chomp; chomp;
my @line = split("\t"); my @line = split("\t");
push @{$therapy{$line[0]}{$line[1]}}, $_ if (($line[1] eq "Deletion" or $line[1] eq "Amplification") and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if (($line[1] eq "Deletion" or $line[1] eq "Amplification") and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
} }
@ -105,8 +105,8 @@ while (<IN>) {
next if $uniq{$cnv_detected} > 1; next if $uniq{$cnv_detected} > 1;
my $bool = 0; my $bool = 0;
my $cn = int(0.5 + 2 ** (1 + $line2[4])); my $cn = int(0.5 + 2 ** (1 + $line2[4]));
if ($cn >= 4 and exists $therapy{$cnv_detected}{'Amplification'}) { if ($cn >= 4 and exists $therapy{lc $cnv_detected}{lc 'Amplification'}) {
foreach my $entry (@{$therapy{$cnv_detected}{'Amplification'}}) { foreach my $entry (@{$therapy{lc $cnv_detected}{lc 'Amplification'}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
my $dises; my $dises;
if (!(exists $dis{lc $line[2]})) { if (!(exists $dis{lc $line[2]})) {
@ -130,8 +130,8 @@ while (<IN>) {
} }
} }
} }
elsif ($cn == 0 and exists $therapy{$cnv_detected}{'Deletion'}) { elsif ($cn == 0 and exists $therapy{lc $cnv_detected}{lc 'Deletion'}) {
foreach my $entry (@{$therapy{$cnv_detected}{'Deletion'}}) { foreach my $entry (@{$therapy{lc $cnv_detected}{lc 'Deletion'}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
my $dises; my $dises;
if (!(exists $dis{lc $line[2]})) { if (!(exists $dis{lc $line[2]})) {

View File

@ -13,7 +13,7 @@ open MUT, "$database_path/fusion.csv";
my %mut; my %mut;
while (<MUT>) { while (<MUT>) {
my @line = split(/,/); my @line = split(/,/);
$mut{$line[1]}{$line[0]} = $line[2]; $mut{lc $line[1]}{lc $line[0]} = $line[2];
} }
open THERAPY, "$database_path/targetTherapy.txt"; open THERAPY, "$database_path/targetTherapy.txt";
@ -23,7 +23,7 @@ my %therapy;
while (<THERAPY>) { while (<THERAPY>) {
chomp; chomp;
my @line = split("\t"); my @line = split("\t");
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
} }
##药物翻译信息 ##药物翻译信息
@ -109,24 +109,24 @@ while (<IN>) {
# 将数据与表头对应 # 将数据与表头对应
my %record; my %record;
@record{@column_names} = @splitline; @record{@column_names} = @splitline;
if (not exists $mut{"$record{'FUSION'} Fusion"}{$record{'GENE1'}} and not exists $mut{"$record{'FUSION'} Fusion"}{$record{'GENE2'}}) { if (not exists $mut{lc "$record{'FUSION'} Fusion"}{lc $record{'GENE1'}} and not exists $mut{lc "$record{'FUSION'} Fusion"}{lc $record{'GENE2'}}) {
push @vus, "$_\t."; push @vus, "$_\t.";
} }
else { else {
my $gene = (keys %{$mut{"$record{'FUSION'} Fusion"}})[0]; my $gene = (keys %{$mut{lc "$record{'FUSION'} Fusion"}})[0];
my $sig = $mut{"$record{'FUSION'} Fusion"}{$gene}; my $sig = $mut{lc "$record{'FUSION'} Fusion"}{lc $gene};
if ($sig =~ /neutral/i) { if ($sig =~ /neutral/i) {
push @neg, "$_\t$sig"; push @neg, "$_\t$sig";
} }
else { else {
if (not exists $therapy{$gene}) { if (not exists $therapy{lc $gene}) {
push @vus, "$_\t$sig"; push @vus, "$_\t$sig";
} }
else { else {
my $bool = 0; my $bool = 0;
## ##
if (exists $therapy{$gene}{"$record{'FUSION'} Fusion"}) { if (exists $therapy{lc $gene}{lc "$record{'FUSION'} Fusion"}) {
foreach my $entry (@{$therapy{$gene}{"$record{'FUSION'}Fusion"}}) { foreach my $entry (@{$therapy{lc $gene}{lc "$record{'FUSION'}Fusion"}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
@ -143,8 +143,8 @@ while (<IN>) {
} }
} }
## ##
if (exists $therapy{$gene}{"Fusion"}) { if (exists $therapy{lc $gene}{lc "Fusion"}) {
foreach my $entry (@{$therapy{$gene}{"Fusion"}}) { foreach my $entry (@{$therapy{lc $gene}{lc "Fusion"}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$sig\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};

View File

@ -27,7 +27,7 @@ while (<THERAPY>) {
chomp; chomp;
my @line = split("\t"); my @line = split("\t");
# push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); # push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[1] =~ /fusion/i and $line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
} }
##药物翻译信息 ##药物翻译信息
@ -107,9 +107,9 @@ while (<IN>) {
my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7]; my $freq = (split(/:/, $splitline[9]))[9] / (split(/:/, $splitline[9]))[7];
if (exists $therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}) { if (exists $therapy{lc 'BCL2L11'}{lc 'DELETION POLYMORPHISM'}) {
print "$freq\n"; print "$freq\n";
foreach my $entry (@{$therapy{'BCL2L11'}{'DELETION POLYMORPHISM'}}) { foreach my $entry (@{$therapy{lc 'BCL2L11'}{lc 'DELETION POLYMORPHISM'}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {

View File

@ -17,7 +17,7 @@ open MUT, "$database_path/snv_indel_mutation.csv";
my %mut; my %mut;
while (<MUT>) { while (<MUT>) {
my @line = split(/,/); my @line = split(/,/);
$mut{$line[0]}{$line[1]} = $line[2]; $mut{lc $line[0]}{lc $line[1]} = $line[2];
} }
##将用药信息记录到%therapy ##将用药信息记录到%therapy
@ -28,7 +28,7 @@ my %therapy;
while (<THERAPY>) { while (<THERAPY>) {
chomp; chomp;
my @line = split("\t"); my @line = split("\t");
push @{$therapy{$line[0]}{$line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i); push @{$therapy{lc $line[0]}{lc $line[1]}}, $_ if ($line[9] ne 'D' and $line[2] !~ /Leukemia|Lymphoma|Myeloid/i);
} }
##药物翻译信息 ##药物翻译信息
@ -114,12 +114,13 @@ while (<IN>) {
($codon =~ /dup/) ? ("Exon $exon insertion") : ($codon =~ /dup/) ? ("Exon $exon insertion") :
("Exon $exon mutation"); ("Exon $exon mutation");
} }
elsif ($record{'Func_refGene'} =~ /splicing/) { elsif ($record{'ExonicFunc_refGene'} =~ /skipping/) {
$protein = 'Truncating Mutations'; $protein = 'Exon 14 Skipping Mutation';
print "$protein\n";
$mut_type = ''; $mut_type = '';
} }
elsif ($record{'ExonicFunc_refGene'} =~ /skipping/) { elsif ($record{'Func_refGene'} =~ /splicing/) {
$protein = 'Exon 14 skipping Mutations'; $protein = 'Truncating Mutation';
$mut_type = ''; $mut_type = '';
} }
else { else {
@ -127,7 +128,7 @@ while (<IN>) {
} }
##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral写入@neg;若基因不存在于%therapy写入@vus; ##若突变不存在于%mut,写入@vus,若突变存在于%mut且neutral写入@neg;若基因不存在于%therapy写入@vus;
if (not exists $mut{$gene}{$protein}) { if (not exists $mut{lc $gene}{lc $protein}) {
if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') { if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') {
push @neg, "$_\t."; push @neg, "$_\t.";
} }
@ -136,115 +137,120 @@ while (<IN>) {
} }
} }
else { else {
if ($mut{$gene}{$protein} =~ /neutral/i) { if ($mut{lc $gene}{lc $protein} =~ /neutral/i) {
push @neg, "$_\t$mut{$gene}{$protein}"; push @neg, "$_\t$mut{lc $gene}{lc $protein}";
} }
elsif ($mut{$gene}{$protein} =~ /Inconclusive/i) { elsif ($mut{lc $gene}{lc $protein} =~ /Inconclusive/i) {
push @vus, "$_\t$mut{$gene}{$protein}"; push @vus, "$_\t$mut{lc $gene}{lc $protein}";
} }
else { else {
if (not exists $therapy{$gene}) { if (not exists $therapy{lc $gene}) {
if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') { if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') {
push @neg, "$_\t$mut{$gene}{$protein}"; push @neg, "$_\t$mut{lc $gene}{lc $protein}";
} }
else { else {
push @vus, "$_\t$mut{$gene}{$protein}"; push @vus, "$_\t$mut{lc $gene}{lc $protein}";
} }
} }
else { else {
my $bool = 0; my $bool = 0;
# 匹配 p. # 匹配 p.
if (exists $therapy{$gene}{$protein}) { if (exists $therapy{lc $gene}{lc $protein}) {
foreach my $entry (@{$therapy{$gene}{$protein}}) { foreach my $entry (@{$therapy{lc $gene}{lc $protein}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
} }
} }
# 匹配 Mutation # 匹配 Mutation
if (exists $therapy{$gene}{'Mutation'}) { if (exists $therapy{lc $gene}{'mutation'}) {
foreach my $entry (@{$therapy{$gene}{'Mutation'}}) { foreach my $entry (@{$therapy{lc $gene}{'mutation'}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
} }
} }
# 去掉最后一个字符 例如V600E 去掉之后 V600再去匹配 # 去掉最后一个字符 例如V600E 去掉之后 V600再去匹配
if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1}) { if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{lc $gene}{lc $1}) {
foreach my $entry (@{$therapy{$gene}{$1}}) { foreach my $entry (@{$therapy{lc $gene}{lc $1}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
} }
} }
# 去掉最后一个字符 加上“.X”去匹配 # 去掉最后一个字符 加上“.X”去匹配
if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{$gene}{$1 . "X"}) { if ($protein =~ /^(\w\d+)\w$/ and exists $therapy{lc $gene}{lc $1 . "X"}) {
foreach my $entry (@{$therapy{$gene}{$1 . "X"}}) { foreach my $entry (@{$therapy{lc $gene}{lc $1 . "X"}}) {
my @line = split("\t", $entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) { if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) { elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) { elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$mut{$gene}{$protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]}; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1; $bool = 1;
} }
} }
} }
# 外显子 模式去匹配 # 外显子 模式去匹配
if (exists $therapy{$gene}{$mut_type}) { if (exists $therapy{lc $gene}{lc $mut_type}) {
foreach my $entry(@{$therapy{$gene}{$mut_type}}){ foreach my $entry (@{$therapy{lc $gene}{lc $mut_type}}) {
my @line=split("\t",$entry); my @line = split("\t", $entry);
if (($line[14] eq 'A') and (grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}})){ if (($line[14] eq 'A') and (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}})) {
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
}elsif(($line[14] eq 'A') and (grep{lc$line[2] ne lc$_}@{$dis2{$cancer_type}})){ $bool = 1;
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t非适应症"."\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1; }
}elsif(grep{lc$line[2] eq lc$_}@{$dis2{$cancer_type}}){ elsif (($line[14] eq 'A') and (grep {lc $line[2] ne lc $_} @{$dis2{$cancer_type}})) {
push @pos,"$_\t$mut{$gene}{$protein}\t".join("\t",@line[0..9,14])."\t\.\t".&drug($line[3])."\t".$dis{lc$line[2]};$bool=1; push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t非适应症" . "\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
}
elsif (grep {lc $line[2] eq lc $_} @{$dis2{$cancer_type}}) {
push @pos, "$_\t$mut{lc $gene}{lc $protein}\t" . join("\t", @line[0 .. 9, 14]) . "\t\.\t" . &drug($line[3]) . "\t" . $dis{lc $line[2]};
$bool = 1;
} }
} }
} }
# 没有匹配上 # 没有匹配上
if ($bool == 0) { if ($bool == 0) {
if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') { if ($record{'CLNSIG'} =~ /benign/i and $record{'CLNSIG'} !~ /sensitivity|pathogenic|uncertain|\./i and $record{'cosmic91'} ne '.') {
push @neg, "$_\t$mut{$gene}{$protein}"; push @neg, "$_\t$mut{lc $gene}{lc $protein}";
} }
else { else {
push @vus, "$_\t$mut{$gene}{$protein}"; push @vus, "$_\t$mut{lc $gene}{lc $protein}";
} }
} }
} }

View File

@ -1,14 +1,16 @@
#! /usr/bin/env python3 #! /usr/bin/env python3
import argparse import argparse
import pysam import pysam
class VcfSetOperations: class VcfSetOperations:
def __init__(self, vcf_file1, vcf_file2=None, bed_file=None, merge=False): def __init__(self, vcf_file1, vcf_file2=None, bed_file=None, merge=False, snp=False):
self.vcf1 = pysam.VariantFile(vcf_file1) self.vcf1 = pysam.VariantFile(vcf_file1)
self.vcf2 = pysam.VariantFile(vcf_file2) if vcf_file2 else None self.vcf2 = pysam.VariantFile(vcf_file2) if vcf_file2 else None
self.bed_regions = self.load_bed_regions(bed_file) if bed_file else None self.bed_regions = self.load_bed_regions(bed_file) if bed_file else None
self.snp = snp
self.merge = merge self.merge = merge
def build_record_dict(self, vcf_file): def build_record_dict(self, vcf_file):
@ -39,7 +41,7 @@ class VcfSetOperations:
for record1 in self.vcf1: for record1 in self.vcf1:
key = (record1.contig, record1.pos, record1.ref, record1.alts[0]) key = (record1.contig, record1.pos, record1.ref, record1.alts[0])
if self.is_within_bed_region(record1) and (records2 is None or key in records2): if self.is_within_bed_region(record1, self.snp) and (records2 is None or key in records2):
output_vcf.write(record1) output_vcf.write(record1)
output_vcf.close() output_vcf.close()
@ -54,16 +56,24 @@ class VcfSetOperations:
output_vcf.write(record2) output_vcf.write(record2)
output_vcf.close() output_vcf.close()
def is_within_bed_region(self, record): def is_within_bed_region(self, record, snp=False):
# 如果没有提供 BED 文件,则不筛选
if not self.bed_regions: if not self.bed_regions:
return True # 如果没有提供 BED 文件,则不筛选 return True
# 检查记录是否是点突变 # 检查记录是否是点突变 # 如果是点突变,不考虑
if len(record.ref) == 1 and len(record.alts[0]) == 1: if len(record.ref) == 1 and len(record.alts[0]) == 1 and not snp:
return False # 如果是点突变,不考虑 return False
for (bed_chrom, bed_start, bed_end) in self.bed_regions: for (bed_chrom, bed_start, bed_end) in self.bed_regions:
# 如果在 BED 区域内有插入、删除或替代的变异,返回 True
if record.contig == bed_chrom and bed_start < record.pos <= bed_end: if record.contig == bed_chrom and bed_start < record.pos <= bed_end:
return True # 如果在 BED 区域内有插入、删除或替代的变异,返回 True return True
return False # 如果在 BED 区域内没有插入、删除或替代的变异,返回 False # 缺失的情况考虑终点位置在bed区域内
if len(record.ref) > len(record.alts[0]):
end_pos = record.pos + len(record.ref)
if record.contig == bed_chrom and bed_start < end_pos <= bed_end:
return True
# 如果在 BED 区域内没有插入、删除或替代的变异,返回 False
return False
def close_files(self): def close_files(self):
if self.vcf1: if self.vcf1:
@ -77,6 +87,7 @@ if __name__ == "__main__":
parser.add_argument("vcf1", help="First VCF file") parser.add_argument("vcf1", help="First VCF file")
parser.add_argument("-v", "--vcf2", help="Second VCF file for intersection (optional)") parser.add_argument("-v", "--vcf2", help="Second VCF file for intersection (optional)")
parser.add_argument("-b", "--bed", help="BED file for filtering (optional)") parser.add_argument("-b", "--bed", help="BED file for filtering (optional)")
parser.add_argument("-s", "--snp", action="store_true", help="BED region for snp ")
parser.add_argument("-m", "--merge", action="store_true", help="Merge two VCF files (optional)") parser.add_argument("-m", "--merge", action="store_true", help="Merge two VCF files (optional)")
parser.add_argument("-o", "--output", help="Output file (optional)") parser.add_argument("-o", "--output", help="Output file (optional)")
@ -90,6 +101,6 @@ if __name__ == "__main__":
if not args.vcf2 and not args.bed: if not args.vcf2 and not args.bed:
print("Error: You need to specify either VCF2 or BED filtering.") print("Error: You need to specify either VCF2 or BED filtering.")
else: else:
vcf_set_operations = VcfSetOperations(args.vcf1, args.vcf2, args.bed) vcf_set_operations = VcfSetOperations(args.vcf1, args.vcf2, args.bed, args.snp)
vcf_set_operations.find_intersection(args.output) vcf_set_operations.find_intersection(args.output)
vcf_set_operations.close_files() vcf_set_operations.close_files()

View File

@ -5297,7 +5297,8 @@ MET,M1250T,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,981_1028splice,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,981_1028splice,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,H1094R,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,H1094R,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,Y1230H,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,Y1230H,Oncogenic,Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,Exon 14 Deletion,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,Exon 14 Skipping Mutation,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,Exon 14 skipping mutation,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,Exon 14 deletions,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,Exon 14 deletions,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,D1228N,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,D1228N,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No
MET,X1010_splice,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No MET,X1010_splice,Likely Oncogenic,Likely Gain-of-function,4,ENST00000397752,NM_000245.2,Yes,No

Can't render this file because it is too large.

View File

@ -302,14 +302,24 @@ task hotspot {
vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \ vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \
-b $PUBLIC/hotspot/hotspot_delins.bed \ -b $PUBLIC/hotspot/hotspot_delins.bed \
-o ${output_dir}/mutation/${name}.hotspot.delins.vcf -o ${output_dir}/mutation/${name}.raw.hotspot.delins.vcf
vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \
-b $PUBLIC/hotspot/hotspot_snpindel.bed \
-o ${output_dir}/mutation/${name}.raw.hotspot.snpindel.vcf \
-s
vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \ vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.vcf \
-v $PUBLIC/hotspot/hotspot_snv.vcf \ -v $PUBLIC/hotspot/hotspot_snv.vcf \
-o ${output_dir}/mutation/${name}.hotspot.snv.vcf -o ${output_dir}/mutation/${name}.raw.hotspot.snv.vcf
vcf_operations.py ${output_dir}/mutation/${name}.hotspot.snv.vcf \ vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.snv.vcf \
-v ${output_dir}/mutation/${name}.hotspot.delins.vcf \ -v ${output_dir}/mutation/${name}.raw.hotspot.delins.vcf \
-o ${output_dir}/mutation/${name}.raw.hotspot.snv_delins.vcf \
-m
vcf_operations.py ${output_dir}/mutation/${name}.raw.hotspot.snv_delins.vcf \
-v ${output_dir}/mutation/${name}.raw.hotspot.snpindel.vcf \
-o ${output_dir}/mutation/${name}.hotspot.vcf \ -o ${output_dir}/mutation/${name}.hotspot.vcf \
-m -m

View File

@ -17,9 +17,6 @@ task run_chemo {
>>> >>>
output {
String run_chemo_res = "${output_dir}/chemo/${name}.drug.res.txt"
}
} }
workflow call_chemo { workflow call_chemo {

View File

@ -1,3 +1,4 @@
# fusion
task rmdup_picard { task rmdup_picard {
String name String name

View File

@ -15,9 +15,6 @@ task run_hereditary {
>>> >>>
output {
String run_hereditary_txt = "${output_dir}/hereditary/${name}.hereditary.txt"
}
} }
workflow call_hereditary { workflow call_hereditary {

View File

@ -19,9 +19,6 @@ task msi_single {
-o ${output_dir}/msi/${name}.msi.txt -o ${output_dir}/msi/${name}.msi.txt
>>> >>>
output {
String run_msi_txt = "${output_dir}/msi/${name}.msi.txt"
}
} }
task msi_paired { task msi_paired {
@ -46,10 +43,6 @@ task msi_paired {
>>> >>>
output {
String run_msi_txt = "${output_dir}/msi/${name}.msi.txt"
}
} }
workflow call_msi { workflow call_msi {

View File

@ -1,3 +1,4 @@
# neoantigen
task run_neoantigen { task run_neoantigen {
String tumor String tumor

View File

@ -1,3 +1,4 @@
# pollution
task run_pollution { task run_pollution {
String name String name
@ -20,10 +21,6 @@ task run_pollution {
-c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed -c $PUBLIC/pollution/${probe}_contaminate_cnvkit.bed
>>> >>>
output {
String run_pollution_res = "${output_dir}/pollution/${name}_pollution.csv"
}
} }
workflow call_pollution { workflow call_pollution {

View File

@ -27,9 +27,6 @@ task run_post {
>>> >>>
output {
String run_merged = "${output_dir}/report/${name}.merged_file.xlsx"
}
} }
workflow call_postprocess { workflow call_postprocess {

View File

@ -6,6 +6,7 @@ task run_tmb {
String project String project
String sample_type String sample_type
String output_dir String output_dir
command <<< command <<<
if [ ! -d ${output_dir}/tmb ];then if [ ! -d ${output_dir}/tmb ];then
@ -20,9 +21,6 @@ task run_tmb {
tmb tmb
>>> >>>
output {
String run_tmb_txt = "${output_dir}/tmb/${name}.tmb.txt"
}
} }
workflow call_tmb { workflow call_tmb {