2023-12-19 13:37:52 +08:00
|
|
|
#!/usr/bin/perl
|
|
|
|
|
use strict;
|
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
|
|
die "usage:perl $0 germline/somatic sample_type name input_file output_file" unless @ARGV == 5;
|
|
|
|
|
my ($type, $sample_type, $name, $input, $output) = @ARGV;
|
|
|
|
|
|
|
|
|
|
open IN, $input or die "Cannot open file $input: $!";
|
|
|
|
|
open OUT, ">${output}";
|
|
|
|
|
|
|
|
|
|
if ($type eq "germline") {
|
|
|
|
|
while (<IN>) {
|
|
|
|
|
if (/^##/) {
|
|
|
|
|
print OUT;
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
elsif (/^#CHROM/) {
|
2023-12-21 10:22:54 +08:00
|
|
|
$_ =~ s/(\S+)-match/NORMAL/;
|
|
|
|
|
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
|
|
|
|
|
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
|
|
|
|
|
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
|
|
|
|
|
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
|
|
|
|
|
print OUT $_;
|
2023-12-19 13:37:52 +08:00
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
chomp;
|
|
|
|
|
my @line = split("\t", $_);
|
|
|
|
|
next if $line[6] ne "PASS";
|
|
|
|
|
|
|
|
|
|
if ($sample_type eq 'c') {
|
|
|
|
|
my $n_af = (split(":", $line[-1]))[6];
|
|
|
|
|
print OUT "$_\n" if $n_af >= 0.1;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
my $n_af = (split(":", $line[-1]))[6];
|
|
|
|
|
my $t_af = (split(":", $line[-2]))[6];
|
|
|
|
|
print OUT "$_\n" if ($n_af >= 0.1 and $t_af >= 0.1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
elsif ($type eq "somatic") {
|
|
|
|
|
while (<IN>) {
|
|
|
|
|
if (/^##/) {
|
2023-12-21 10:22:54 +08:00
|
|
|
print OUT $_;
|
2023-12-19 13:37:52 +08:00
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
elsif (/^#CHROM/) {
|
2023-12-21 10:22:54 +08:00
|
|
|
$_ =~ s/(\S+)-match/NORMAL/;
|
|
|
|
|
print OUT "##FILTER=<ID=noise,Description=\"noise, JM add\">\n";
|
|
|
|
|
print OUT "##FILTER=<ID=MSI_u_x_r_x,Description=\"MSI_u_x_r_x, JM add\">\n";
|
|
|
|
|
print OUT "##FILTER=<ID=strandBias,Description=\"strandBias, JM add\">\n";
|
|
|
|
|
print OUT "##FILTER=<ID=multi_alt,Description=\"multi_alt, JM add\">\n";
|
|
|
|
|
print OUT $_;
|
2023-12-19 13:37:52 +08:00
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
chomp;
|
|
|
|
|
my @line = split("\t", $_);
|
2023-12-21 10:22:54 +08:00
|
|
|
# next if $line[6] ne "PASS";
|
|
|
|
|
my $filters = split(";", $line[6]);
|
|
|
|
|
# 不是pass 低频的; 或者 0.02 < 0.05 之间 个数大于2
|
2023-12-19 13:37:52 +08:00
|
|
|
if ($sample_type eq 'c') {
|
|
|
|
|
my $t_af = (split(":", $line[-1]))[6];
|
2023-12-21 10:22:54 +08:00
|
|
|
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
|
|
|
|
|
print OUT "$_\n" if $t_af >= 0.02;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-19 13:37:52 +08:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
my $n_af = (split(":", $line[-1]))[6];
|
|
|
|
|
my $t_af = (split(":", $line[-2]))[6];
|
2023-12-21 10:22:54 +08:00
|
|
|
if (!($line[6] ne 'PASS' and ($t_af < 0.02 or ($t_af >= 0.02 and $t_af < 0.05 and $filters >= 2)))) {
|
|
|
|
|
print OUT "$_\n" if ($n_af < 0.02 and $t_af >= 0.05);
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-19 13:37:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
die "erro type!!! somatic or germline for now"
|
|
|
|
|
}
|